33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
90 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
91 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
93 if (Op0Idx == -1 && Op1Idx == -1)
97 if ((Op0Idx == -1 && Op1Idx != -1) ||
98 (Op1Idx == -1 && Op0Idx != -1))
119 return !
MI.memoperands_empty() &&
121 return MMO->isLoad() && MMO->isInvariant();
143 if (!
MI.hasImplicitDef() &&
144 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
145 !
MI.mayRaiseFPException())
153bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
156 if (
MI.isCompare()) {
167 switch (
Use.getOpcode()) {
168 case AMDGPU::S_AND_SAVEEXEC_B32:
169 case AMDGPU::S_AND_SAVEEXEC_B64:
171 case AMDGPU::S_AND_B32:
172 case AMDGPU::S_AND_B64:
173 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
183 switch (
MI.getOpcode()) {
186 case AMDGPU::V_READFIRSTLANE_B32:
203 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
208 for (
auto Op :
MI.uses()) {
209 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
210 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
215 if (FromCycle ==
nullptr)
221 while (FromCycle && !FromCycle->
contains(ToCycle)) {
241 int64_t &Offset1)
const {
249 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
253 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
269 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
270 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
271 if (Offset0Idx == -1 || Offset1Idx == -1)
278 Offset0Idx -=
get(Opc0).NumDefs;
279 Offset1Idx -=
get(Opc1).NumDefs;
309 if (!Load0Offset || !Load1Offset)
326 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
327 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
329 if (OffIdx0 == -1 || OffIdx1 == -1)
335 OffIdx0 -=
get(Opc0).NumDefs;
336 OffIdx1 -=
get(Opc1).NumDefs;
355 case AMDGPU::DS_READ2ST64_B32:
356 case AMDGPU::DS_READ2ST64_B64:
357 case AMDGPU::DS_WRITE2ST64_B32:
358 case AMDGPU::DS_WRITE2ST64_B64:
373 OffsetIsScalable =
false;
390 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
392 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
393 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
406 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
407 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
408 if (Offset0 + 1 != Offset1)
419 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
427 Offset = EltSize * Offset0;
429 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
430 if (DataOpIdx == -1) {
431 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
433 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
449 if (BaseOp && !BaseOp->
isFI())
457 if (SOffset->
isReg())
463 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
465 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
474 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
475 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
477 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
478 if (VAddr0Idx >= 0) {
480 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
487 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
502 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
519 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
521 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
538 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
546 if (MO1->getAddrSpace() != MO2->getAddrSpace())
549 const auto *Base1 = MO1->getValue();
550 const auto *Base2 = MO2->getValue();
551 if (!Base1 || !Base2)
559 return Base1 == Base2;
563 int64_t Offset1,
bool OffsetIsScalable1,
565 int64_t Offset2,
bool OffsetIsScalable2,
566 unsigned ClusterSize,
567 unsigned NumBytes)
const {
580 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
599 const unsigned LoadSize = NumBytes / ClusterSize;
600 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
601 return NumDWords <= MaxMemoryClusterDWords;
615 int64_t Offset0, int64_t Offset1,
616 unsigned NumLoads)
const {
617 assert(Offset1 > Offset0 &&
618 "Second offset should be larger than first offset!");
623 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
630 const char *Msg =
"illegal VGPR to SGPR copy") {
651 assert((
TII.getSubtarget().hasMAIInsts() &&
652 !
TII.getSubtarget().hasGFX90AInsts()) &&
653 "Expected GFX908 subtarget.");
656 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
657 "Source register of the copy should be either an SGPR or an AGPR.");
660 "Destination register of the copy should be an AGPR.");
669 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
672 if (!Def->modifiesRegister(SrcReg, &RI))
675 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
676 Def->getOperand(0).getReg() != SrcReg)
683 bool SafeToPropagate =
true;
686 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
687 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
688 SafeToPropagate =
false;
690 if (!SafeToPropagate)
693 for (
auto I = Def;
I !=
MI; ++
I)
694 I->clearRegisterKills(DefOp.
getReg(), &RI);
703 if (ImpUseSuperReg) {
704 Builder.addReg(ImpUseSuperReg,
712 RS.enterBasicBlockEnd(
MBB);
713 RS.backward(std::next(
MI));
722 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
725 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
726 "VGPR used for an intermediate copy should have been reserved.");
731 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
741 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
742 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
743 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
750 if (ImpUseSuperReg) {
751 UseBuilder.
addReg(ImpUseSuperReg,
772 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
773 int16_t SubIdx = BaseIndices[Idx];
774 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
775 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
776 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
777 unsigned Opcode = AMDGPU::S_MOV_B32;
780 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
781 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
782 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
786 DestSubReg = RI.getSubReg(DestReg, SubIdx);
787 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
788 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
789 Opcode = AMDGPU::S_MOV_B64;
804 assert(FirstMI && LastMI);
812 LastMI->addRegisterKilled(SrcReg, &RI);
818 Register SrcReg,
bool KillSrc,
bool RenamableDest,
819 bool RenamableSrc)
const {
821 unsigned Size = RI.getRegSizeInBits(*RC);
823 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
829 if (((
Size == 16) != (SrcSize == 16))) {
831 assert(ST.useRealTrue16Insts());
836 if (DestReg == SrcReg) {
842 RC = RI.getPhysRegBaseClass(DestReg);
843 Size = RI.getRegSizeInBits(*RC);
844 SrcRC = RI.getPhysRegBaseClass(SrcReg);
845 SrcSize = RI.getRegSizeInBits(*SrcRC);
849 if (RC == &AMDGPU::VGPR_32RegClass) {
851 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
852 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
853 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
854 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
860 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
861 RC == &AMDGPU::SReg_32RegClass) {
862 if (SrcReg == AMDGPU::SCC) {
869 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (DestReg == AMDGPU::VCC_LO) {
888 if (RC == &AMDGPU::SReg_64RegClass) {
889 if (SrcReg == AMDGPU::SCC) {
896 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
897 if (DestReg == AMDGPU::VCC) {
915 if (DestReg == AMDGPU::SCC) {
918 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
922 assert(ST.hasScalarCompareEq64());
936 if (RC == &AMDGPU::AGPR_32RegClass) {
937 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
938 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
953 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
960 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
961 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
963 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
964 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
965 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
966 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
969 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
970 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
983 if (IsAGPRDst || IsAGPRSrc) {
984 if (!DstLow || !SrcLow) {
986 "Cannot use hi16 subreg with an AGPR!");
993 if (ST.useRealTrue16Insts()) {
999 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1000 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1012 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1013 if (!DstLow || !SrcLow) {
1015 "Cannot use hi16 subreg on VI!");
1038 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1039 if (ST.hasMovB64()) {
1044 if (ST.hasPkMovB32()) {
1060 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1061 if (RI.isSGPRClass(RC)) {
1062 if (!RI.isSGPRClass(SrcRC)) {
1066 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1072 unsigned EltSize = 4;
1073 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1074 if (RI.isAGPRClass(RC)) {
1075 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1076 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1077 else if (RI.hasVGPRs(SrcRC) ||
1078 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1079 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1081 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1082 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1083 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1084 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1085 (RI.isProperlyAlignedRC(*RC) &&
1086 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1088 if (ST.hasMovB64()) {
1089 Opcode = AMDGPU::V_MOV_B64_e32;
1091 }
else if (ST.hasPkMovB32()) {
1092 Opcode = AMDGPU::V_PK_MOV_B32;
1102 std::unique_ptr<RegScavenger> RS;
1103 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1104 RS = std::make_unique<RegScavenger>();
1110 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1111 const bool CanKillSuperReg = KillSrc && !Overlap;
1113 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1116 SubIdx = SubIndices[Idx];
1118 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1119 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1120 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1121 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1123 bool IsFirstSubreg = Idx == 0;
1124 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1126 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1130 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1131 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1177 return &AMDGPU::VGPR_32RegClass;
1189 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1190 "Not a VGPR32 reg");
1192 if (
Cond.size() == 1) {
1193 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1202 }
else if (
Cond.size() == 2) {
1203 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1205 case SIInstrInfo::SCC_TRUE: {
1206 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1216 case SIInstrInfo::SCC_FALSE: {
1217 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1227 case SIInstrInfo::VCCNZ: {
1230 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1241 case SIInstrInfo::VCCZ: {
1244 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 case SIInstrInfo::EXECNZ: {
1256 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1257 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1268 case SIInstrInfo::EXECZ: {
1269 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1270 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1295 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1308 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1318 int64_t &ImmVal)
const {
1319 switch (
MI.getOpcode()) {
1320 case AMDGPU::V_MOV_B32_e32:
1321 case AMDGPU::S_MOV_B32:
1322 case AMDGPU::S_MOVK_I32:
1323 case AMDGPU::S_MOV_B64:
1324 case AMDGPU::V_MOV_B64_e32:
1325 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1326 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1327 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1328 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1329 case AMDGPU::V_MOV_B64_PSEUDO:
1330 case AMDGPU::V_MOV_B16_t16_e32: {
1334 return MI.getOperand(0).getReg() == Reg;
1339 case AMDGPU::V_MOV_B16_t16_e64: {
1341 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1343 return MI.getOperand(0).getReg() == Reg;
1348 case AMDGPU::S_BREV_B32:
1349 case AMDGPU::V_BFREV_B32_e32:
1350 case AMDGPU::V_BFREV_B32_e64: {
1354 return MI.getOperand(0).getReg() == Reg;
1359 case AMDGPU::S_NOT_B32:
1360 case AMDGPU::V_NOT_B32_e32:
1361 case AMDGPU::V_NOT_B32_e64: {
1364 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1365 return MI.getOperand(0).getReg() == Reg;
1377 if (RI.isAGPRClass(DstRC))
1378 return AMDGPU::COPY;
1379 if (RI.getRegSizeInBits(*DstRC) == 16) {
1382 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1384 if (RI.getRegSizeInBits(*DstRC) == 32)
1385 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1386 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1387 return AMDGPU::S_MOV_B64;
1388 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1389 return AMDGPU::V_MOV_B64_PSEUDO;
1390 return AMDGPU::COPY;
1395 bool IsIndirectSrc)
const {
1396 if (IsIndirectSrc) {
1398 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1400 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1423 if (VecSize <= 1024)
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1455 if (VecSize <= 1024)
1456 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1483 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1485 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1487 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1488 if (VecSize <= 1024)
1489 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1512 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1521 if (VecSize <= 1024)
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1529 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1531 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1533 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1535 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1536 if (VecSize <= 1024)
1537 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1544 bool IsSGPR)
const {
1556 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1563 return AMDGPU::SI_SPILL_S32_SAVE;
1565 return AMDGPU::SI_SPILL_S64_SAVE;
1567 return AMDGPU::SI_SPILL_S96_SAVE;
1569 return AMDGPU::SI_SPILL_S128_SAVE;
1571 return AMDGPU::SI_SPILL_S160_SAVE;
1573 return AMDGPU::SI_SPILL_S192_SAVE;
1575 return AMDGPU::SI_SPILL_S224_SAVE;
1577 return AMDGPU::SI_SPILL_S256_SAVE;
1579 return AMDGPU::SI_SPILL_S288_SAVE;
1581 return AMDGPU::SI_SPILL_S320_SAVE;
1583 return AMDGPU::SI_SPILL_S352_SAVE;
1585 return AMDGPU::SI_SPILL_S384_SAVE;
1587 return AMDGPU::SI_SPILL_S512_SAVE;
1589 return AMDGPU::SI_SPILL_S1024_SAVE;
1598 return AMDGPU::SI_SPILL_V16_SAVE;
1600 return AMDGPU::SI_SPILL_V32_SAVE;
1602 return AMDGPU::SI_SPILL_V64_SAVE;
1604 return AMDGPU::SI_SPILL_V96_SAVE;
1606 return AMDGPU::SI_SPILL_V128_SAVE;
1608 return AMDGPU::SI_SPILL_V160_SAVE;
1610 return AMDGPU::SI_SPILL_V192_SAVE;
1612 return AMDGPU::SI_SPILL_V224_SAVE;
1614 return AMDGPU::SI_SPILL_V256_SAVE;
1616 return AMDGPU::SI_SPILL_V288_SAVE;
1618 return AMDGPU::SI_SPILL_V320_SAVE;
1620 return AMDGPU::SI_SPILL_V352_SAVE;
1622 return AMDGPU::SI_SPILL_V384_SAVE;
1624 return AMDGPU::SI_SPILL_V512_SAVE;
1626 return AMDGPU::SI_SPILL_V1024_SAVE;
1635 return AMDGPU::SI_SPILL_AV32_SAVE;
1637 return AMDGPU::SI_SPILL_AV64_SAVE;
1639 return AMDGPU::SI_SPILL_AV96_SAVE;
1641 return AMDGPU::SI_SPILL_AV128_SAVE;
1643 return AMDGPU::SI_SPILL_AV160_SAVE;
1645 return AMDGPU::SI_SPILL_AV192_SAVE;
1647 return AMDGPU::SI_SPILL_AV224_SAVE;
1649 return AMDGPU::SI_SPILL_AV256_SAVE;
1651 return AMDGPU::SI_SPILL_AV288_SAVE;
1653 return AMDGPU::SI_SPILL_AV320_SAVE;
1655 return AMDGPU::SI_SPILL_AV352_SAVE;
1657 return AMDGPU::SI_SPILL_AV384_SAVE;
1659 return AMDGPU::SI_SPILL_AV512_SAVE;
1661 return AMDGPU::SI_SPILL_AV1024_SAVE;
1668 bool IsVectorSuperClass) {
1673 if (IsVectorSuperClass)
1674 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1676 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1682 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1689 if (ST.hasMAIInsts())
1708 FrameInfo.getObjectAlign(FrameIndex));
1709 unsigned SpillSize = RI.getSpillSize(*RC);
1712 if (RI.isSGPRClass(RC)) {
1714 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1715 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1716 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1724 if (SrcReg.
isVirtual() && SpillSize == 4) {
1725 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1734 if (RI.spillSGPRToVGPR())
1754 return AMDGPU::SI_SPILL_S32_RESTORE;
1756 return AMDGPU::SI_SPILL_S64_RESTORE;
1758 return AMDGPU::SI_SPILL_S96_RESTORE;
1760 return AMDGPU::SI_SPILL_S128_RESTORE;
1762 return AMDGPU::SI_SPILL_S160_RESTORE;
1764 return AMDGPU::SI_SPILL_S192_RESTORE;
1766 return AMDGPU::SI_SPILL_S224_RESTORE;
1768 return AMDGPU::SI_SPILL_S256_RESTORE;
1770 return AMDGPU::SI_SPILL_S288_RESTORE;
1772 return AMDGPU::SI_SPILL_S320_RESTORE;
1774 return AMDGPU::SI_SPILL_S352_RESTORE;
1776 return AMDGPU::SI_SPILL_S384_RESTORE;
1778 return AMDGPU::SI_SPILL_S512_RESTORE;
1780 return AMDGPU::SI_SPILL_S1024_RESTORE;
1789 return AMDGPU::SI_SPILL_V16_RESTORE;
1791 return AMDGPU::SI_SPILL_V32_RESTORE;
1793 return AMDGPU::SI_SPILL_V64_RESTORE;
1795 return AMDGPU::SI_SPILL_V96_RESTORE;
1797 return AMDGPU::SI_SPILL_V128_RESTORE;
1799 return AMDGPU::SI_SPILL_V160_RESTORE;
1801 return AMDGPU::SI_SPILL_V192_RESTORE;
1803 return AMDGPU::SI_SPILL_V224_RESTORE;
1805 return AMDGPU::SI_SPILL_V256_RESTORE;
1807 return AMDGPU::SI_SPILL_V288_RESTORE;
1809 return AMDGPU::SI_SPILL_V320_RESTORE;
1811 return AMDGPU::SI_SPILL_V352_RESTORE;
1813 return AMDGPU::SI_SPILL_V384_RESTORE;
1815 return AMDGPU::SI_SPILL_V512_RESTORE;
1817 return AMDGPU::SI_SPILL_V1024_RESTORE;
1826 return AMDGPU::SI_SPILL_AV32_RESTORE;
1828 return AMDGPU::SI_SPILL_AV64_RESTORE;
1830 return AMDGPU::SI_SPILL_AV96_RESTORE;
1832 return AMDGPU::SI_SPILL_AV128_RESTORE;
1834 return AMDGPU::SI_SPILL_AV160_RESTORE;
1836 return AMDGPU::SI_SPILL_AV192_RESTORE;
1838 return AMDGPU::SI_SPILL_AV224_RESTORE;
1840 return AMDGPU::SI_SPILL_AV256_RESTORE;
1842 return AMDGPU::SI_SPILL_AV288_RESTORE;
1844 return AMDGPU::SI_SPILL_AV320_RESTORE;
1846 return AMDGPU::SI_SPILL_AV352_RESTORE;
1848 return AMDGPU::SI_SPILL_AV384_RESTORE;
1850 return AMDGPU::SI_SPILL_AV512_RESTORE;
1852 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1859 bool IsVectorSuperClass) {
1864 if (IsVectorSuperClass)
1865 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1867 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1873 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1880 if (ST.hasMAIInsts())
1883 assert(!RI.isAGPRClass(RC));
1897 unsigned SpillSize = RI.getSpillSize(*RC);
1904 FrameInfo.getObjectAlign(FrameIndex));
1906 if (RI.isSGPRClass(RC)) {
1908 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1909 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1910 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1915 if (DestReg.
isVirtual() && SpillSize == 4) {
1917 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1920 if (RI.spillSGPRToVGPR())
1946 unsigned Quantity)
const {
1948 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1949 while (Quantity > 0) {
1950 unsigned Arg = std::min(Quantity, MaxSNopCount);
1957 auto *MF =
MBB.getParent();
1960 assert(Info->isEntryFunction());
1962 if (
MBB.succ_empty()) {
1963 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1964 if (HasNoTerminator) {
1965 if (Info->returnsVoid()) {
1979 constexpr unsigned DoorbellIDMask = 0x3ff;
1980 constexpr unsigned ECQueueWaveAbort = 0x400;
1986 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1987 ContBB =
MBB.splitAt(
MI,
false);
1991 MBB.addSuccessor(TrapBB);
1995 ContBB = HaltLoopBB;
2002 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2006 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2009 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2010 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2014 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2015 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2016 .
addUse(DoorbellRegMasked)
2017 .
addImm(ECQueueWaveAbort);
2018 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2019 .
addUse(SetWaveAbortBit);
2022 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2037 switch (
MI.getOpcode()) {
2039 if (
MI.isMetaInstruction())
2044 return MI.getOperand(0).getImm() + 1;
2054 switch (
MI.getOpcode()) {
2056 case AMDGPU::S_MOV_B64_term:
2059 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2062 case AMDGPU::S_MOV_B32_term:
2065 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2068 case AMDGPU::S_XOR_B64_term:
2071 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2074 case AMDGPU::S_XOR_B32_term:
2077 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2079 case AMDGPU::S_OR_B64_term:
2082 MI.setDesc(
get(AMDGPU::S_OR_B64));
2084 case AMDGPU::S_OR_B32_term:
2087 MI.setDesc(
get(AMDGPU::S_OR_B32));
2090 case AMDGPU::S_ANDN2_B64_term:
2093 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2096 case AMDGPU::S_ANDN2_B32_term:
2099 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2102 case AMDGPU::S_AND_B64_term:
2105 MI.setDesc(
get(AMDGPU::S_AND_B64));
2108 case AMDGPU::S_AND_B32_term:
2111 MI.setDesc(
get(AMDGPU::S_AND_B32));
2114 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2117 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2120 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2123 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2126 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2127 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2130 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2131 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2133 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2137 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2140 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2143 int64_t Imm =
MI.getOperand(1).getImm();
2145 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2146 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2153 MI.eraseFromParent();
2159 case AMDGPU::V_MOV_B64_PSEUDO: {
2161 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2162 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2167 if (ST.hasMovB64()) {
2168 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2173 if (
SrcOp.isImm()) {
2175 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2176 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2198 if (ST.hasPkMovB32() &&
2219 MI.eraseFromParent();
2222 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2226 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2230 if (ST.has64BitLiterals()) {
2231 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2237 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2242 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2243 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2245 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2246 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2253 MI.eraseFromParent();
2256 case AMDGPU::V_SET_INACTIVE_B32: {
2260 .
add(
MI.getOperand(3))
2261 .
add(
MI.getOperand(4))
2262 .
add(
MI.getOperand(1))
2263 .
add(
MI.getOperand(2))
2264 .
add(
MI.getOperand(5));
2265 MI.eraseFromParent();
2268 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2269 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2270 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2271 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2272 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2273 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2274 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2275 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2276 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2277 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2278 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2279 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2280 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2281 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2282 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2283 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2284 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2285 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2286 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2287 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2288 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2289 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2290 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2291 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2292 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2293 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2294 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2295 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2296 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2297 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2298 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2299 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2300 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2304 if (RI.hasVGPRs(EltRC)) {
2305 Opc = AMDGPU::V_MOVRELD_B32_e32;
2307 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2308 : AMDGPU::S_MOVRELD_B32;
2313 bool IsUndef =
MI.getOperand(1).isUndef();
2314 unsigned SubReg =
MI.getOperand(3).getImm();
2315 assert(VecReg ==
MI.getOperand(1).getReg());
2320 .
add(
MI.getOperand(2))
2324 const int ImpDefIdx =
2326 const int ImpUseIdx = ImpDefIdx + 1;
2328 MI.eraseFromParent();
2331 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2332 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2333 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2334 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2335 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2336 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2337 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2338 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2339 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2340 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2341 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2342 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2343 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2344 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2345 assert(ST.useVGPRIndexMode());
2347 bool IsUndef =
MI.getOperand(1).isUndef();
2356 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2360 .
add(
MI.getOperand(2))
2364 const int ImpDefIdx =
2366 const int ImpUseIdx = ImpDefIdx + 1;
2373 MI.eraseFromParent();
2376 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2377 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2378 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2379 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2380 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2381 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2382 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2383 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2384 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2385 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2386 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2387 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2388 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2389 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2390 assert(ST.useVGPRIndexMode());
2393 bool IsUndef =
MI.getOperand(1).isUndef();
2411 MI.eraseFromParent();
2414 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2417 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2418 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2437 if (ST.hasGetPCZeroExtension()) {
2441 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2448 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2458 MI.eraseFromParent();
2461 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2471 Op.setOffset(
Op.getOffset() + 4);
2473 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2477 MI.eraseFromParent();
2480 case AMDGPU::ENTER_STRICT_WWM: {
2486 case AMDGPU::ENTER_STRICT_WQM: {
2493 MI.eraseFromParent();
2496 case AMDGPU::EXIT_STRICT_WWM:
2497 case AMDGPU::EXIT_STRICT_WQM: {
2503 case AMDGPU::SI_RETURN: {
2517 MI.eraseFromParent();
2521 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2522 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2523 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2526 case AMDGPU::S_GETPC_B64_pseudo:
2527 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2528 if (ST.hasGetPCZeroExtension()) {
2530 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2539 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2540 assert(ST.hasBF16PackedInsts());
2541 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2565 case AMDGPU::S_LOAD_DWORDX16_IMM:
2566 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2579 for (
auto &CandMO :
I->operands()) {
2580 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2588 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2592 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2596 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2598 unsigned NewOpcode = -1;
2599 if (SubregSize == 256)
2600 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2601 else if (SubregSize == 128)
2602 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2609 MRI.setRegClass(DestReg, NewRC);
2612 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2617 MI->getOperand(0).setReg(DestReg);
2618 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2622 OffsetMO->
setImm(FinalOffset);
2628 MI->setMemRefs(*MF, NewMMOs);
2641std::pair<MachineInstr*, MachineInstr*>
2643 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2645 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2648 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2649 return std::pair(&
MI,
nullptr);
2660 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2662 if (Dst.isPhysical()) {
2663 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2666 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2670 for (
unsigned I = 1;
I <= 2; ++
I) {
2673 if (
SrcOp.isImm()) {
2675 Imm.ashrInPlace(Part * 32);
2676 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2680 if (Src.isPhysical())
2681 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2688 MovDPP.addImm(MO.getImm());
2690 Split[Part] = MovDPP;
2694 if (Dst.isVirtual())
2701 MI.eraseFromParent();
2702 return std::pair(Split[0], Split[1]);
2705std::optional<DestSourcePair>
2707 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2710 return std::nullopt;
2714 AMDGPU::OpName Src0OpName,
2716 AMDGPU::OpName Src1OpName)
const {
2723 "All commutable instructions have both src0 and src1 modifiers");
2725 int Src0ModsVal = Src0Mods->
getImm();
2726 int Src1ModsVal = Src1Mods->
getImm();
2728 Src1Mods->
setImm(Src0ModsVal);
2729 Src0Mods->
setImm(Src1ModsVal);
2738 bool IsKill = RegOp.
isKill();
2740 bool IsUndef = RegOp.
isUndef();
2741 bool IsDebug = RegOp.
isDebug();
2743 if (NonRegOp.
isImm())
2745 else if (NonRegOp.
isFI())
2766 int64_t NonRegVal = NonRegOp1.
getImm();
2769 NonRegOp2.
setImm(NonRegVal);
2776 unsigned OpIdx1)
const {
2781 unsigned Opc =
MI.getOpcode();
2782 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2792 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2795 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2800 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2806 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2821 unsigned Src1Idx)
const {
2822 assert(!NewMI &&
"this should never be used");
2824 unsigned Opc =
MI.getOpcode();
2826 if (CommutedOpcode == -1)
2829 if (Src0Idx > Src1Idx)
2832 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2833 static_cast<int>(Src0Idx) &&
2834 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2835 static_cast<int>(Src1Idx) &&
2836 "inconsistency with findCommutedOpIndices");
2861 Src1, AMDGPU::OpName::src1_modifiers);
2864 AMDGPU::OpName::src1_sel);
2876 unsigned &SrcOpIdx0,
2877 unsigned &SrcOpIdx1)
const {
2882 unsigned &SrcOpIdx0,
2883 unsigned &SrcOpIdx1)
const {
2884 if (!
Desc.isCommutable())
2887 unsigned Opc =
Desc.getOpcode();
2888 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2892 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2896 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2900 int64_t BrOffset)
const {
2917 return MI.getOperand(0).getMBB();
2922 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2923 MI.getOpcode() == AMDGPU::SI_LOOP)
2935 "new block should be inserted for expanding unconditional branch");
2938 "restore block should be inserted for restoring clobbered registers");
2946 if (ST.useAddPC64Inst()) {
2948 MCCtx.createTempSymbol(
"offset",
true);
2952 MCCtx.createTempSymbol(
"post_addpc",
true);
2953 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2957 Offset->setVariableValue(OffsetExpr);
2961 assert(RS &&
"RegScavenger required for long branching");
2965 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2969 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2970 ST.hasVALUReadSGPRHazard();
2971 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2972 if (FlushSGPRWrites)
2980 ApplyHazardWorkarounds();
2983 MCCtx.createTempSymbol(
"post_getpc",
true);
2987 MCCtx.createTempSymbol(
"offset_lo",
true);
2989 MCCtx.createTempSymbol(
"offset_hi",
true);
2992 .
addReg(PCReg, 0, AMDGPU::sub0)
2996 .
addReg(PCReg, 0, AMDGPU::sub1)
2998 ApplyHazardWorkarounds();
3039 if (LongBranchReservedReg) {
3040 RS->enterBasicBlock(
MBB);
3041 Scav = LongBranchReservedReg;
3043 RS->enterBasicBlockEnd(
MBB);
3044 Scav = RS->scavengeRegisterBackwards(
3049 RS->setRegUsed(Scav);
3050 MRI.replaceRegWith(PCReg, Scav);
3051 MRI.clearVirtRegs();
3057 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3058 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3059 MRI.clearVirtRegs();
3074unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3076 case SIInstrInfo::SCC_TRUE:
3077 return AMDGPU::S_CBRANCH_SCC1;
3078 case SIInstrInfo::SCC_FALSE:
3079 return AMDGPU::S_CBRANCH_SCC0;
3080 case SIInstrInfo::VCCNZ:
3081 return AMDGPU::S_CBRANCH_VCCNZ;
3082 case SIInstrInfo::VCCZ:
3083 return AMDGPU::S_CBRANCH_VCCZ;
3084 case SIInstrInfo::EXECNZ:
3085 return AMDGPU::S_CBRANCH_EXECNZ;
3086 case SIInstrInfo::EXECZ:
3087 return AMDGPU::S_CBRANCH_EXECZ;
3093SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3095 case AMDGPU::S_CBRANCH_SCC0:
3097 case AMDGPU::S_CBRANCH_SCC1:
3099 case AMDGPU::S_CBRANCH_VCCNZ:
3101 case AMDGPU::S_CBRANCH_VCCZ:
3103 case AMDGPU::S_CBRANCH_EXECNZ:
3105 case AMDGPU::S_CBRANCH_EXECZ:
3117 bool AllowModify)
const {
3118 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3120 TBB =
I->getOperand(0).getMBB();
3124 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3125 if (Pred == INVALID_BR)
3130 Cond.push_back(
I->getOperand(1));
3134 if (
I ==
MBB.end()) {
3140 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3142 FBB =
I->getOperand(0).getMBB();
3152 bool AllowModify)
const {
3160 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3161 switch (
I->getOpcode()) {
3162 case AMDGPU::S_MOV_B64_term:
3163 case AMDGPU::S_XOR_B64_term:
3164 case AMDGPU::S_OR_B64_term:
3165 case AMDGPU::S_ANDN2_B64_term:
3166 case AMDGPU::S_AND_B64_term:
3167 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3168 case AMDGPU::S_MOV_B32_term:
3169 case AMDGPU::S_XOR_B32_term:
3170 case AMDGPU::S_OR_B32_term:
3171 case AMDGPU::S_ANDN2_B32_term:
3172 case AMDGPU::S_AND_B32_term:
3173 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3176 case AMDGPU::SI_ELSE:
3177 case AMDGPU::SI_KILL_I1_TERMINATOR:
3178 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3195 int *BytesRemoved)
const {
3197 unsigned RemovedSize = 0;
3200 if (
MI.isBranch() ||
MI.isReturn()) {
3202 MI.eraseFromParent();
3208 *BytesRemoved = RemovedSize;
3225 int *BytesAdded)
const {
3226 if (!FBB &&
Cond.empty()) {
3230 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3237 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3249 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3267 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3274 if (
Cond.size() != 2) {
3278 if (
Cond[0].isImm()) {
3289 Register FalseReg,
int &CondCycles,
3290 int &TrueCycles,
int &FalseCycles)
const {
3296 if (
MRI.getRegClass(FalseReg) != RC)
3300 CondCycles = TrueCycles = FalseCycles = NumInsts;
3303 return RI.hasVGPRs(RC) && NumInsts <= 6;
3311 if (
MRI.getRegClass(FalseReg) != RC)
3317 if (NumInsts % 2 == 0)
3320 CondCycles = TrueCycles = FalseCycles = NumInsts;
3321 return RI.isSGPRClass(RC);
3332 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3333 if (Pred == VCCZ || Pred == SCC_FALSE) {
3334 Pred =
static_cast<BranchPredicate
>(-Pred);
3340 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3342 if (DstSize == 32) {
3344 if (Pred == SCC_TRUE) {
3359 if (DstSize == 64 && Pred == SCC_TRUE) {
3369 static const int16_t Sub0_15[] = {
3370 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3371 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3372 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3373 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3376 static const int16_t Sub0_15_64[] = {
3377 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3378 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3379 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3380 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3383 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3385 const int16_t *SubIndices = Sub0_15;
3386 int NElts = DstSize / 32;
3390 if (Pred == SCC_TRUE) {
3392 SelOp = AMDGPU::S_CSELECT_B32;
3393 EltRC = &AMDGPU::SGPR_32RegClass;
3395 SelOp = AMDGPU::S_CSELECT_B64;
3396 EltRC = &AMDGPU::SGPR_64RegClass;
3397 SubIndices = Sub0_15_64;
3403 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3408 for (
int Idx = 0; Idx != NElts; ++Idx) {
3409 Register DstElt =
MRI.createVirtualRegister(EltRC);
3412 unsigned SubIdx = SubIndices[Idx];
3415 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3418 .
addReg(FalseReg, 0, SubIdx)
3419 .
addReg(TrueReg, 0, SubIdx);
3423 .
addReg(TrueReg, 0, SubIdx)
3424 .
addReg(FalseReg, 0, SubIdx);
3436 switch (
MI.getOpcode()) {
3437 case AMDGPU::V_MOV_B16_t16_e32:
3438 case AMDGPU::V_MOV_B16_t16_e64:
3439 case AMDGPU::V_MOV_B32_e32:
3440 case AMDGPU::V_MOV_B32_e64:
3441 case AMDGPU::V_MOV_B64_PSEUDO:
3442 case AMDGPU::V_MOV_B64_e32:
3443 case AMDGPU::V_MOV_B64_e64:
3444 case AMDGPU::S_MOV_B32:
3445 case AMDGPU::S_MOV_B64:
3446 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3448 case AMDGPU::WWM_COPY:
3449 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3450 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3451 case AMDGPU::V_ACCVGPR_MOV_B32:
3452 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3453 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3461 switch (
MI.getOpcode()) {
3462 case AMDGPU::V_MOV_B16_t16_e32:
3463 case AMDGPU::V_MOV_B16_t16_e64:
3465 case AMDGPU::V_MOV_B32_e32:
3466 case AMDGPU::V_MOV_B32_e64:
3467 case AMDGPU::V_MOV_B64_PSEUDO:
3468 case AMDGPU::V_MOV_B64_e32:
3469 case AMDGPU::V_MOV_B64_e64:
3470 case AMDGPU::S_MOV_B32:
3471 case AMDGPU::S_MOV_B64:
3472 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3474 case AMDGPU::WWM_COPY:
3475 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3476 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3477 case AMDGPU::V_ACCVGPR_MOV_B32:
3478 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3479 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3487 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3488 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3489 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3492 unsigned Opc =
MI.getOpcode();
3494 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3496 MI.removeOperand(Idx);
3502 MI.setDesc(NewDesc);
3508 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3509 Desc.implicit_defs().size();
3511 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3512 MI.removeOperand(
I);
3516 unsigned SubRegIndex) {
3517 switch (SubRegIndex) {
3518 case AMDGPU::NoSubRegister:
3528 case AMDGPU::sub1_lo16:
3530 case AMDGPU::sub1_hi16:
3533 return std::nullopt;
3541 case AMDGPU::V_MAC_F16_e32:
3542 case AMDGPU::V_MAC_F16_e64:
3543 case AMDGPU::V_MAD_F16_e64:
3544 return AMDGPU::V_MADAK_F16;
3545 case AMDGPU::V_MAC_F32_e32:
3546 case AMDGPU::V_MAC_F32_e64:
3547 case AMDGPU::V_MAD_F32_e64:
3548 return AMDGPU::V_MADAK_F32;
3549 case AMDGPU::V_FMAC_F32_e32:
3550 case AMDGPU::V_FMAC_F32_e64:
3551 case AMDGPU::V_FMA_F32_e64:
3552 return AMDGPU::V_FMAAK_F32;
3553 case AMDGPU::V_FMAC_F16_e32:
3554 case AMDGPU::V_FMAC_F16_e64:
3555 case AMDGPU::V_FMAC_F16_t16_e64:
3556 case AMDGPU::V_FMAC_F16_fake16_e64:
3557 case AMDGPU::V_FMAC_F16_t16_e32:
3558 case AMDGPU::V_FMAC_F16_fake16_e32:
3559 case AMDGPU::V_FMA_F16_e64:
3560 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3561 ? AMDGPU::V_FMAAK_F16_t16
3562 : AMDGPU::V_FMAAK_F16_fake16
3563 : AMDGPU::V_FMAAK_F16;
3564 case AMDGPU::V_FMAC_F64_e32:
3565 case AMDGPU::V_FMAC_F64_e64:
3566 case AMDGPU::V_FMA_F64_e64:
3567 return AMDGPU::V_FMAAK_F64;
3575 case AMDGPU::V_MAC_F16_e32:
3576 case AMDGPU::V_MAC_F16_e64:
3577 case AMDGPU::V_MAD_F16_e64:
3578 return AMDGPU::V_MADMK_F16;
3579 case AMDGPU::V_MAC_F32_e32:
3580 case AMDGPU::V_MAC_F32_e64:
3581 case AMDGPU::V_MAD_F32_e64:
3582 return AMDGPU::V_MADMK_F32;
3583 case AMDGPU::V_FMAC_F32_e32:
3584 case AMDGPU::V_FMAC_F32_e64:
3585 case AMDGPU::V_FMA_F32_e64:
3586 return AMDGPU::V_FMAMK_F32;
3587 case AMDGPU::V_FMAC_F16_e32:
3588 case AMDGPU::V_FMAC_F16_e64:
3589 case AMDGPU::V_FMAC_F16_t16_e64:
3590 case AMDGPU::V_FMAC_F16_fake16_e64:
3591 case AMDGPU::V_FMAC_F16_t16_e32:
3592 case AMDGPU::V_FMAC_F16_fake16_e32:
3593 case AMDGPU::V_FMA_F16_e64:
3594 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3595 ? AMDGPU::V_FMAMK_F16_t16
3596 : AMDGPU::V_FMAMK_F16_fake16
3597 : AMDGPU::V_FMAMK_F16;
3598 case AMDGPU::V_FMAC_F64_e32:
3599 case AMDGPU::V_FMAC_F64_e64:
3600 case AMDGPU::V_FMA_F64_e64:
3601 return AMDGPU::V_FMAMK_F64;
3613 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3615 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3618 if (
Opc == AMDGPU::COPY) {
3619 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3626 if (HasMultipleUses) {
3629 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3632 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3640 if (ImmDefSize == 32 &&
3645 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3646 RI.getSubRegIdxSize(UseSubReg) == 16;
3649 if (RI.hasVGPRs(DstRC))
3652 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3658 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3665 for (
unsigned MovOp :
3666 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3667 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3675 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3679 if (MovDstPhysReg) {
3683 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3690 if (MovDstPhysReg) {
3691 if (!MovDstRC->
contains(MovDstPhysReg))
3693 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3707 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3715 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3719 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3721 UseMI.getOperand(0).setReg(MovDstPhysReg);
3726 UseMI.setDesc(NewMCID);
3727 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3728 UseMI.addImplicitDefUseOperands(*MF);
3732 if (HasMultipleUses)
3735 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3736 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3737 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3738 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3739 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3740 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3741 Opc == AMDGPU::V_FMAC_F64_e64) {
3750 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3761 auto CopyRegOperandToNarrowerRC =
3764 if (!
MI.getOperand(OpNo).isReg())
3768 if (RI.getCommonSubClass(RC, NewRC) != NewRC)
3771 BuildMI(*
MI.getParent(),
MI.getIterator(),
MI.getDebugLoc(),
3772 get(AMDGPU::COPY), Tmp)
3774 MI.getOperand(OpNo).setReg(Tmp);
3775 MI.getOperand(OpNo).setIsKill();
3782 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3783 if (!RegSrc->
isReg())
3785 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3786 ST.getConstantBusLimit(
Opc) < 2)
3789 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3801 if (Def && Def->isMoveImmediate() &&
3816 unsigned SrcSubReg = RegSrc->
getSubReg();
3821 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3822 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3823 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3824 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3825 UseMI.untieRegOperand(
3826 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3833 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3834 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3838 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3839 UseMI.getOperand(0).getReg())
3841 UseMI.getOperand(0).setReg(Tmp);
3842 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3843 CopyRegOperandToNarrowerRC(
UseMI, 3, NewRC);
3846 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3848 DefMI.eraseFromParent();
3855 if (ST.getConstantBusLimit(
Opc) < 2) {
3858 bool Src0Inlined =
false;
3859 if (Src0->
isReg()) {
3864 if (Def && Def->isMoveImmediate() &&
3869 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3876 if (Src1->
isReg() && !Src0Inlined) {
3879 if (Def && Def->isMoveImmediate() &&
3881 MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
3883 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3896 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3897 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3898 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3899 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3900 UseMI.untieRegOperand(
3901 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3903 const std::optional<int64_t> SubRegImm =
3913 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3914 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
3918 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3919 UseMI.getOperand(0).getReg())
3921 UseMI.getOperand(0).setReg(Tmp);
3922 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3923 CopyRegOperandToNarrowerRC(
UseMI, 2, NewRC);
3931 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3933 DefMI.eraseFromParent();
3945 if (BaseOps1.
size() != BaseOps2.
size())
3947 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3948 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3956 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3957 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3958 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3960 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3963bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3966 int64_t Offset0, Offset1;
3969 bool Offset0IsScalable, Offset1IsScalable;
3983 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3984 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3991 "MIa must load from or modify a memory location");
3993 "MIb must load from or modify a memory location");
4015 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4022 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4032 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4046 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4057 if (
Reg.isPhysical())
4059 auto *Def =
MRI.getUniqueVRegDef(
Reg);
4061 Imm = Def->getOperand(1).getImm();
4081 unsigned NumOps =
MI.getNumOperands();
4084 if (
Op.isReg() &&
Op.isKill())
4092 case AMDGPU::V_MAC_F16_e32:
4093 case AMDGPU::V_MAC_F16_e64:
4094 return AMDGPU::V_MAD_F16_e64;
4095 case AMDGPU::V_MAC_F32_e32:
4096 case AMDGPU::V_MAC_F32_e64:
4097 return AMDGPU::V_MAD_F32_e64;
4098 case AMDGPU::V_MAC_LEGACY_F32_e32:
4099 case AMDGPU::V_MAC_LEGACY_F32_e64:
4100 return AMDGPU::V_MAD_LEGACY_F32_e64;
4101 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4102 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4103 return AMDGPU::V_FMA_LEGACY_F32_e64;
4104 case AMDGPU::V_FMAC_F16_e32:
4105 case AMDGPU::V_FMAC_F16_e64:
4106 case AMDGPU::V_FMAC_F16_t16_e64:
4107 case AMDGPU::V_FMAC_F16_fake16_e64:
4108 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4109 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4110 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4111 : AMDGPU::V_FMA_F16_gfx9_e64;
4112 case AMDGPU::V_FMAC_F32_e32:
4113 case AMDGPU::V_FMAC_F32_e64:
4114 return AMDGPU::V_FMA_F32_e64;
4115 case AMDGPU::V_FMAC_F64_e32:
4116 case AMDGPU::V_FMAC_F64_e64:
4117 return AMDGPU::V_FMA_F64_e64;
4137 if (
MI.isBundle()) {
4140 if (
MI.getBundleSize() != 1)
4142 CandidateMI =
MI.getNextNode();
4146 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4150 if (
MI.isBundle()) {
4155 MI.untieRegOperand(MO.getOperandNo());
4163 if (Def.isEarlyClobber() && Def.isReg() &&
4168 auto UpdateDefIndex = [&](
LiveRange &LR) {
4169 auto *S = LR.find(OldIndex);
4170 if (S != LR.end() && S->start == OldIndex) {
4171 assert(S->valno && S->valno->def == OldIndex);
4172 S->start = NewIndex;
4173 S->valno->def = NewIndex;
4177 for (
auto &SR : LI.subranges())
4183 if (U.RemoveMIUse) {
4186 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4188 if (
MRI.hasOneNonDBGUse(DefReg)) {
4190 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4191 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4192 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4193 U.RemoveMIUse->removeOperand(
I);
4198 if (
MI.isBundle()) {
4202 if (MO.isReg() && MO.getReg() == DefReg) {
4203 assert(MO.getSubReg() == 0 &&
4204 "tied sub-registers in bundles currently not supported");
4205 MI.removeOperand(MO.getOperandNo());
4220 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4222 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4223 MIOp.setIsUndef(
true);
4224 MIOp.setReg(DummyReg);
4228 if (
MI.isBundle()) {
4232 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4233 MIOp.setIsUndef(
true);
4234 MIOp.setReg(DummyReg);
4247 return MI.isBundle() ? &
MI : NewMI;
4252 ThreeAddressUpdates &U)
const {
4254 unsigned Opc =
MI.getOpcode();
4258 if (NewMFMAOpc != -1) {
4261 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4262 MIB.
add(
MI.getOperand(
I));
4270 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4275 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4276 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4277 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4281 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4282 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4283 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4284 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4285 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4286 bool Src0Literal =
false;
4291 case AMDGPU::V_MAC_F16_e64:
4292 case AMDGPU::V_FMAC_F16_e64:
4293 case AMDGPU::V_FMAC_F16_t16_e64:
4294 case AMDGPU::V_FMAC_F16_fake16_e64:
4295 case AMDGPU::V_MAC_F32_e64:
4296 case AMDGPU::V_MAC_LEGACY_F32_e64:
4297 case AMDGPU::V_FMAC_F32_e64:
4298 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4299 case AMDGPU::V_FMAC_F64_e64:
4301 case AMDGPU::V_MAC_F16_e32:
4302 case AMDGPU::V_FMAC_F16_e32:
4303 case AMDGPU::V_MAC_F32_e32:
4304 case AMDGPU::V_MAC_LEGACY_F32_e32:
4305 case AMDGPU::V_FMAC_F32_e32:
4306 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4307 case AMDGPU::V_FMAC_F64_e32: {
4308 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4309 AMDGPU::OpName::src0);
4310 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4321 MachineInstrBuilder MIB;
4324 const MachineOperand *Src0Mods =
4327 const MachineOperand *Src1Mods =
4330 const MachineOperand *Src2Mods =
4336 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4337 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4339 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4341 MachineInstr *
DefMI;
4377 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4393 if (Src0Literal && !ST.hasVOP3Literal())
4421 switch (
MI.getOpcode()) {
4422 case AMDGPU::S_SET_GPR_IDX_ON:
4423 case AMDGPU::S_SET_GPR_IDX_MODE:
4424 case AMDGPU::S_SET_GPR_IDX_OFF:
4442 if (
MI.isTerminator() ||
MI.isPosition())
4446 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4449 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4455 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4456 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4457 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4458 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4459 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4464 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4465 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4466 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4475 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4484 if (
MI.memoperands_empty())
4489 unsigned AS = Memop->getAddrSpace();
4490 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4491 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4492 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4493 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4508 if (
MI.memoperands_empty())
4517 unsigned AS = Memop->getAddrSpace();
4534 if (ST.isTgSplitEnabled())
4539 if (
MI.memoperands_empty())
4544 unsigned AS = Memop->getAddrSpace();
4560 unsigned Opcode =
MI.getOpcode();
4575 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4576 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4577 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4580 if (
MI.isCall() ||
MI.isInlineAsm())
4596 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4597 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4598 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4599 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4607 if (
MI.isMetaInstruction())
4611 if (
MI.isCopyLike()) {
4612 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4616 return MI.readsRegister(AMDGPU::EXEC, &RI);
4627 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4631 switch (Imm.getBitWidth()) {
4637 ST.hasInv2PiInlineImm());
4640 ST.hasInv2PiInlineImm());
4642 return ST.has16BitInsts() &&
4644 ST.hasInv2PiInlineImm());
4651 APInt IntImm = Imm.bitcastToAPInt();
4653 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4661 return ST.has16BitInsts() &&
4664 return ST.has16BitInsts() &&
4674 switch (OperandType) {
4684 int32_t Trunc =
static_cast<int32_t
>(Imm);
4724 int16_t Trunc =
static_cast<int16_t
>(Imm);
4725 return ST.has16BitInsts() &&
4734 int16_t Trunc =
static_cast<int16_t
>(Imm);
4735 return ST.has16BitInsts() &&
4786 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4792 return ST.hasVOP3Literal();
4796 int64_t ImmVal)
const {
4799 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4800 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4801 AMDGPU::OpName::src2))
4803 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4815 "unexpected imm-like operand kind");
4828 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4846 AMDGPU::OpName
OpName)
const {
4848 return Mods && Mods->
getImm();
4861 switch (
MI.getOpcode()) {
4862 default:
return false;
4864 case AMDGPU::V_ADDC_U32_e64:
4865 case AMDGPU::V_SUBB_U32_e64:
4866 case AMDGPU::V_SUBBREV_U32_e64: {
4874 case AMDGPU::V_MAC_F16_e64:
4875 case AMDGPU::V_MAC_F32_e64:
4876 case AMDGPU::V_MAC_LEGACY_F32_e64:
4877 case AMDGPU::V_FMAC_F16_e64:
4878 case AMDGPU::V_FMAC_F16_t16_e64:
4879 case AMDGPU::V_FMAC_F16_fake16_e64:
4880 case AMDGPU::V_FMAC_F32_e64:
4881 case AMDGPU::V_FMAC_F64_e64:
4882 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4888 case AMDGPU::V_CNDMASK_B32_e64:
4894 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4924 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4933 unsigned Op32)
const {
4947 Inst32.
add(
MI.getOperand(
I));
4951 int Idx =
MI.getNumExplicitDefs();
4953 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4958 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4980 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4988 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4991 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4992 AMDGPU::SReg_64RegClass.contains(Reg);
4998 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
5010 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
5020 switch (MO.getReg()) {
5022 case AMDGPU::VCC_LO:
5023 case AMDGPU::VCC_HI:
5025 case AMDGPU::FLAT_SCR:
5038 switch (
MI.getOpcode()) {
5039 case AMDGPU::V_READLANE_B32:
5040 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5041 case AMDGPU::V_WRITELANE_B32:
5042 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5049 if (
MI.isPreISelOpcode() ||
5050 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5065 if (
SubReg.getReg().isPhysical())
5068 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
5079 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
5080 ErrInfo =
"illegal copy from vector register to SGPR";
5098 if (!
MRI.isSSA() &&
MI.isCopy())
5099 return verifyCopy(
MI,
MRI, ErrInfo);
5101 if (SIInstrInfo::isGenericOpcode(Opcode))
5104 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5105 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5106 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5108 if (Src0Idx == -1) {
5110 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5111 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5112 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5113 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5118 if (!
Desc.isVariadic() &&
5119 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5120 ErrInfo =
"Instruction has wrong number of operands.";
5124 if (
MI.isInlineAsm()) {
5137 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5138 ErrInfo =
"inlineasm operand has incorrect register class.";
5146 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5147 ErrInfo =
"missing memory operand from image instruction.";
5152 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5155 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5156 "all fp values to integers.";
5161 int16_t RegClass = getOpRegClassID(OpInfo);
5163 switch (OpInfo.OperandType) {
5165 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5166 ErrInfo =
"Illegal immediate value for operand.";
5200 ErrInfo =
"Illegal immediate value for operand.";
5207 ErrInfo =
"Expected inline constant for operand.";
5222 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5223 ErrInfo =
"Expected immediate, but got non-immediate";
5232 if (OpInfo.isGenericType())
5247 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5249 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5251 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5252 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5259 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5260 ErrInfo =
"Subtarget requires even aligned vector registers";
5265 if (RegClass != -1) {
5266 if (Reg.isVirtual())
5271 ErrInfo =
"Operand has incorrect register class.";
5279 if (!ST.hasSDWA()) {
5280 ErrInfo =
"SDWA is not supported on this target";
5284 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5285 AMDGPU::OpName::dst_sel}) {
5289 int64_t Imm = MO->
getImm();
5291 ErrInfo =
"Invalid SDWA selection";
5296 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5298 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5303 if (!ST.hasSDWAScalar()) {
5305 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5306 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5313 "Only reg allowed as operands in SDWA instructions on GFX9+";
5319 if (!ST.hasSDWAOmod()) {
5322 if (OMod !=
nullptr &&
5324 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5329 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5330 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5331 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5332 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5335 unsigned Mods = Src0ModsMO->
getImm();
5338 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5344 if (
isVOPC(BasicOpcode)) {
5345 if (!ST.hasSDWASdst() && DstIdx != -1) {
5348 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5349 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5352 }
else if (!ST.hasSDWAOutModsVOPC()) {
5355 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5356 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5362 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5363 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5370 if (DstUnused && DstUnused->isImm() &&
5373 if (!Dst.isReg() || !Dst.isTied()) {
5374 ErrInfo =
"Dst register should have tied register";
5379 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5382 "Dst register should be tied to implicit use of preserved register";
5386 ErrInfo =
"Dst register should use same physical register as preserved";
5393 if (
isImage(Opcode) && !
MI.mayStore()) {
5405 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5413 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5417 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5418 if (RegCount > DstSize) {
5419 ErrInfo =
"Image instruction returns too many registers for dst "
5428 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5429 unsigned ConstantBusCount = 0;
5430 bool UsesLiteral =
false;
5433 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5437 LiteralVal = &
MI.getOperand(ImmIdx);
5446 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5457 }
else if (!MO.
isFI()) {
5464 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5474 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5475 return !RI.regsOverlap(SGPRUsed, SGPR);
5484 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5485 Opcode != AMDGPU::V_WRITELANE_B32) {
5486 ErrInfo =
"VOP* instruction violates constant bus restriction";
5490 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5491 ErrInfo =
"VOP3 instruction uses literal";
5498 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5499 unsigned SGPRCount = 0;
5502 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5510 if (MO.
getReg() != SGPRUsed)
5515 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5516 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5523 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5524 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5531 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5541 ErrInfo =
"ABS not allowed in VOP3B instructions";
5554 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5561 if (
Desc.isBranch()) {
5563 ErrInfo =
"invalid branch target for SOPK instruction";
5570 ErrInfo =
"invalid immediate for SOPK instruction";
5575 ErrInfo =
"invalid immediate for SOPK instruction";
5582 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5583 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5584 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5585 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5586 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5587 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5589 const unsigned StaticNumOps =
5590 Desc.getNumOperands() +
Desc.implicit_uses().size();
5591 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5597 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5598 ErrInfo =
"missing implicit register operands";
5604 if (!Dst->isUse()) {
5605 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5610 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5611 UseOpIdx != StaticNumOps + 1) {
5612 ErrInfo =
"movrel implicit operands should be tied";
5619 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5621 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5622 ErrInfo =
"src0 should be subreg of implicit vector use";
5630 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5631 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5637 if (
MI.mayStore() &&
5642 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5643 ErrInfo =
"scalar stores must use m0 as offset register";
5649 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5651 if (
Offset->getImm() != 0) {
5652 ErrInfo =
"subtarget does not support offsets in flat instructions";
5657 if (
isDS(
MI) && !ST.hasGDS()) {
5659 if (GDSOp && GDSOp->
getImm() != 0) {
5660 ErrInfo =
"GDS is not supported on this subtarget";
5668 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5669 AMDGPU::OpName::vaddr0);
5670 AMDGPU::OpName RSrcOpName =
5671 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5672 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5680 ErrInfo =
"dim is out of range";
5685 if (ST.hasR128A16()) {
5687 IsA16 = R128A16->
getImm() != 0;
5688 }
else if (ST.hasA16()) {
5690 IsA16 = A16->
getImm() != 0;
5693 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5695 unsigned AddrWords =
5698 unsigned VAddrWords;
5700 VAddrWords = RsrcIdx - VAddr0Idx;
5701 if (ST.hasPartialNSAEncoding() &&
5703 unsigned LastVAddrIdx = RsrcIdx - 1;
5704 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5712 if (VAddrWords != AddrWords) {
5714 <<
" but got " << VAddrWords <<
"\n");
5715 ErrInfo =
"bad vaddr size";
5725 unsigned DC = DppCt->
getImm();
5726 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5727 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5728 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5729 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5730 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5731 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5732 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5733 ErrInfo =
"Invalid dpp_ctrl value";
5736 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5738 ErrInfo =
"Invalid dpp_ctrl value: "
5739 "wavefront shifts are not supported on GFX10+";
5742 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5744 ErrInfo =
"Invalid dpp_ctrl value: "
5745 "broadcasts are not supported on GFX10+";
5748 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5750 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5751 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5752 !ST.hasGFX90AInsts()) {
5753 ErrInfo =
"Invalid dpp_ctrl value: "
5754 "row_newbroadcast/row_share is not supported before "
5758 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5759 ErrInfo =
"Invalid dpp_ctrl value: "
5760 "row_share and row_xmask are not supported before GFX10";
5765 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5768 ErrInfo =
"Invalid dpp_ctrl value: "
5769 "DP ALU dpp only support row_newbcast";
5776 AMDGPU::OpName DataName =
5777 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5783 if (ST.hasGFX90AInsts()) {
5784 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5785 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5786 ErrInfo =
"Invalid register class: "
5787 "vdata and vdst should be both VGPR or AGPR";
5790 if (
Data && Data2 &&
5792 ErrInfo =
"Invalid register class: "
5793 "both data operands should be VGPR or AGPR";
5797 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5799 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5800 ErrInfo =
"Invalid register class: "
5801 "agpr loads and stores not supported on this GPU";
5807 if (ST.needsAlignedVGPRs()) {
5808 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5813 if (Reg.isPhysical())
5814 return !(RI.getHWRegIndex(Reg) & 1);
5816 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5817 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5820 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5821 Opcode == AMDGPU::DS_GWS_BARRIER) {
5823 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5824 ErrInfo =
"Subtarget requires even aligned vector registers "
5825 "for DS_GWS instructions";
5831 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5832 ErrInfo =
"Subtarget requires even aligned vector registers "
5833 "for vaddr operand of image instructions";
5839 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5841 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5842 ErrInfo =
"Invalid register class: "
5843 "v_accvgpr_write with an SGPR is not supported on this GPU";
5848 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5851 ErrInfo =
"pseudo expects only physical SGPRs";
5858 if (!ST.hasScaleOffset()) {
5859 ErrInfo =
"Subtarget does not support offset scaling";
5863 ErrInfo =
"Instruction does not support offset scaling";
5872 for (
unsigned I = 0;
I < 3; ++
I) {
5878 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5879 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5881 if ((Dst && RI.getRegClassForReg(
MRI, Dst->getReg()) ==
5882 &AMDGPU::SReg_64RegClass) ||
5883 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5884 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5896 switch (
MI.getOpcode()) {
5897 default:
return AMDGPU::INSTRUCTION_LIST_END;
5898 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5899 case AMDGPU::COPY:
return AMDGPU::COPY;
5900 case AMDGPU::PHI:
return AMDGPU::PHI;
5901 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5902 case AMDGPU::WQM:
return AMDGPU::WQM;
5903 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5904 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5905 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5906 case AMDGPU::S_MOV_B32: {
5908 return MI.getOperand(1).isReg() ||
5909 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5910 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5912 case AMDGPU::S_ADD_I32:
5913 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5914 case AMDGPU::S_ADDC_U32:
5915 return AMDGPU::V_ADDC_U32_e32;
5916 case AMDGPU::S_SUB_I32:
5917 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5920 case AMDGPU::S_ADD_U32:
5921 return AMDGPU::V_ADD_CO_U32_e32;
5922 case AMDGPU::S_SUB_U32:
5923 return AMDGPU::V_SUB_CO_U32_e32;
5924 case AMDGPU::S_ADD_U64_PSEUDO:
5925 return AMDGPU::V_ADD_U64_PSEUDO;
5926 case AMDGPU::S_SUB_U64_PSEUDO:
5927 return AMDGPU::V_SUB_U64_PSEUDO;
5928 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5929 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5930 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5931 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5932 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5933 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5934 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5935 case AMDGPU::S_XNOR_B32:
5936 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5937 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5938 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5939 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5940 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5941 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5942 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5943 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5944 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5945 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5946 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5947 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5948 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5949 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5950 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5951 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5952 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5953 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5954 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5955 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5956 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5957 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5958 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5959 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5960 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5961 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5962 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5963 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5964 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5965 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5966 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5967 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5968 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5969 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5970 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5971 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5972 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5973 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5974 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5975 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5976 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5977 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5978 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5979 case AMDGPU::S_CVT_F32_F16:
5980 case AMDGPU::S_CVT_HI_F32_F16:
5981 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5982 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5983 case AMDGPU::S_CVT_F16_F32:
5984 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5985 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5986 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5987 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5988 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5989 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5990 case AMDGPU::S_CEIL_F16:
5991 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
5992 : AMDGPU::V_CEIL_F16_fake16_e64;
5993 case AMDGPU::S_FLOOR_F16:
5994 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
5995 : AMDGPU::V_FLOOR_F16_fake16_e64;
5996 case AMDGPU::S_TRUNC_F16:
5997 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5998 : AMDGPU::V_TRUNC_F16_fake16_e64;
5999 case AMDGPU::S_RNDNE_F16:
6000 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
6001 : AMDGPU::V_RNDNE_F16_fake16_e64;
6002 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6003 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6004 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6005 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6006 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6007 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6008 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6009 case AMDGPU::S_ADD_F16:
6010 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6011 : AMDGPU::V_ADD_F16_fake16_e64;
6012 case AMDGPU::S_SUB_F16:
6013 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6014 : AMDGPU::V_SUB_F16_fake16_e64;
6015 case AMDGPU::S_MIN_F16:
6016 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6017 : AMDGPU::V_MIN_F16_fake16_e64;
6018 case AMDGPU::S_MAX_F16:
6019 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6020 : AMDGPU::V_MAX_F16_fake16_e64;
6021 case AMDGPU::S_MINIMUM_F16:
6022 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6023 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6024 case AMDGPU::S_MAXIMUM_F16:
6025 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6026 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6027 case AMDGPU::S_MUL_F16:
6028 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6029 : AMDGPU::V_MUL_F16_fake16_e64;
6030 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6031 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6032 case AMDGPU::S_FMAC_F16:
6033 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6034 : AMDGPU::V_FMAC_F16_fake16_e64;
6035 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6036 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6037 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6038 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6039 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6040 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6041 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6042 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6043 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6044 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6045 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6046 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6047 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6048 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6049 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6050 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6051 case AMDGPU::S_CMP_LT_F16:
6052 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6053 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6054 case AMDGPU::S_CMP_EQ_F16:
6055 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6056 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6057 case AMDGPU::S_CMP_LE_F16:
6058 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6059 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6060 case AMDGPU::S_CMP_GT_F16:
6061 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6062 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6063 case AMDGPU::S_CMP_LG_F16:
6064 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6065 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6066 case AMDGPU::S_CMP_GE_F16:
6067 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6068 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6069 case AMDGPU::S_CMP_O_F16:
6070 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6071 : AMDGPU::V_CMP_O_F16_fake16_e64;
6072 case AMDGPU::S_CMP_U_F16:
6073 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6074 : AMDGPU::V_CMP_U_F16_fake16_e64;
6075 case AMDGPU::S_CMP_NGE_F16:
6076 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6077 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6078 case AMDGPU::S_CMP_NLG_F16:
6079 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6080 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6081 case AMDGPU::S_CMP_NGT_F16:
6082 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6083 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6084 case AMDGPU::S_CMP_NLE_F16:
6085 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6086 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6087 case AMDGPU::S_CMP_NEQ_F16:
6088 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6089 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6090 case AMDGPU::S_CMP_NLT_F16:
6091 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6092 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6093 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6094 case AMDGPU::V_S_EXP_F16_e64:
6095 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6096 : AMDGPU::V_EXP_F16_fake16_e64;
6097 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6098 case AMDGPU::V_S_LOG_F16_e64:
6099 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6100 : AMDGPU::V_LOG_F16_fake16_e64;
6101 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6102 case AMDGPU::V_S_RCP_F16_e64:
6103 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6104 : AMDGPU::V_RCP_F16_fake16_e64;
6105 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6106 case AMDGPU::V_S_RSQ_F16_e64:
6107 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6108 : AMDGPU::V_RSQ_F16_fake16_e64;
6109 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6110 case AMDGPU::V_S_SQRT_F16_e64:
6111 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6112 : AMDGPU::V_SQRT_F16_fake16_e64;
6115 "Unexpected scalar opcode without corresponding vector one!");
6164 "Not a whole wave func");
6167 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6168 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6175 unsigned OpNo)
const {
6177 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6178 Desc.operands()[OpNo].RegClass == -1) {
6181 if (Reg.isVirtual()) {
6183 return MRI.getRegClass(Reg);
6185 return RI.getPhysRegBaseClass(Reg);
6188 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6189 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6197 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6199 unsigned Size = RI.getRegSizeInBits(*RC);
6200 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6201 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6202 : AMDGPU::V_MOV_B32_e32;
6204 Opcode = AMDGPU::COPY;
6205 else if (RI.isSGPRClass(RC))
6206 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6220 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6226 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6237 if (SubIdx == AMDGPU::sub0)
6239 if (SubIdx == AMDGPU::sub1)
6251void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6267 if (Reg.isPhysical())
6277 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6280 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6287 unsigned Opc =
MI.getOpcode();
6293 constexpr AMDGPU::OpName OpNames[] = {
6294 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6297 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6298 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6308 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6309 if (IsAGPR && !ST.hasMAIInsts())
6311 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6315 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6316 const int DataIdx = AMDGPU::getNamedOperandIdx(
6317 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6318 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6319 MI.getOperand(DataIdx).isReg() &&
6320 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6322 if ((
int)
OpIdx == DataIdx) {
6323 if (VDstIdx != -1 &&
6324 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6327 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6328 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6329 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6334 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6335 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6339 if (ST.hasFlatScratchHiInB64InstHazard() &&
6346 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6367 constexpr unsigned NumOps = 3;
6368 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6369 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6370 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6371 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6376 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6379 MO = &
MI.getOperand(SrcIdx);
6386 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6390 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6394 return !OpSel && !OpSelHi;
6403 int64_t RegClass = getOpRegClassID(OpInfo);
6405 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6414 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6415 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6419 if (!LiteralLimit--)
6429 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6437 if (--ConstantBusLimit <= 0)
6449 if (!LiteralLimit--)
6451 if (--ConstantBusLimit <= 0)
6457 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6461 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6463 !
Op.isIdenticalTo(*MO))
6473 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6487 bool Is64BitOp = Is64BitFPOp ||
6494 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6503 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6521 bool IsGFX950Only = ST.hasGFX950Insts();
6522 bool IsGFX940Only = ST.hasGFX940Insts();
6524 if (!IsGFX950Only && !IsGFX940Only)
6542 unsigned Opcode =
MI.getOpcode();
6544 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6545 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6546 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6547 case AMDGPU::V_MQSAD_U32_U8_e64:
6548 case AMDGPU::V_PK_ADD_F16:
6549 case AMDGPU::V_PK_ADD_F32:
6550 case AMDGPU::V_PK_ADD_I16:
6551 case AMDGPU::V_PK_ADD_U16:
6552 case AMDGPU::V_PK_ASHRREV_I16:
6553 case AMDGPU::V_PK_FMA_F16:
6554 case AMDGPU::V_PK_FMA_F32:
6555 case AMDGPU::V_PK_FMAC_F16_e32:
6556 case AMDGPU::V_PK_FMAC_F16_e64:
6557 case AMDGPU::V_PK_LSHLREV_B16:
6558 case AMDGPU::V_PK_LSHRREV_B16:
6559 case AMDGPU::V_PK_MAD_I16:
6560 case AMDGPU::V_PK_MAD_U16:
6561 case AMDGPU::V_PK_MAX_F16:
6562 case AMDGPU::V_PK_MAX_I16:
6563 case AMDGPU::V_PK_MAX_U16:
6564 case AMDGPU::V_PK_MIN_F16:
6565 case AMDGPU::V_PK_MIN_I16:
6566 case AMDGPU::V_PK_MIN_U16:
6567 case AMDGPU::V_PK_MOV_B32:
6568 case AMDGPU::V_PK_MUL_F16:
6569 case AMDGPU::V_PK_MUL_F32:
6570 case AMDGPU::V_PK_MUL_LO_U16:
6571 case AMDGPU::V_PK_SUB_I16:
6572 case AMDGPU::V_PK_SUB_U16:
6573 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6582 unsigned Opc =
MI.getOpcode();
6585 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6588 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6594 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6601 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6604 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6610 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6620 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6621 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6622 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6634 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6636 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6648 if (HasImplicitSGPR || !
MI.isCommutable()) {
6665 if (CommutedOpc == -1) {
6670 MI.setDesc(
get(CommutedOpc));
6674 bool Src0Kill = Src0.
isKill();
6678 else if (Src1.
isReg()) {
6693 unsigned Opc =
MI.getOpcode();
6696 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6697 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6698 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6701 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6702 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6703 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6704 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6705 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6706 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6707 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6711 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6712 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6717 if (VOP3Idx[2] != -1) {
6719 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6720 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6729 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6730 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6732 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6734 SGPRsUsed.
insert(SGPRReg);
6738 for (
int Idx : VOP3Idx) {
6747 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6759 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6766 if (ConstantBusLimit > 0) {
6778 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6779 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6785 for (
unsigned I = 0;
I < 3; ++
I) {
6798 SRC = RI.getCommonSubClass(SRC, DstRC);
6801 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6803 if (RI.hasAGPRs(VRC)) {
6804 VRC = RI.getEquivalentVGPRClass(VRC);
6805 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6807 get(TargetOpcode::COPY), NewSrcReg)
6814 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6820 for (
unsigned i = 0; i < SubRegs; ++i) {
6821 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6823 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6824 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
6830 get(AMDGPU::REG_SEQUENCE), DstReg);
6831 for (
unsigned i = 0; i < SubRegs; ++i) {
6833 MIB.
addImm(RI.getSubRegFromChannel(i));
6846 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6848 SBase->setReg(SGPR);
6851 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6859 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6860 if (OldSAddrIdx < 0)
6876 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6877 if (NewVAddrIdx < 0)
6880 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6884 if (OldVAddrIdx >= 0) {
6886 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6898 if (OldVAddrIdx == NewVAddrIdx) {
6901 MRI.removeRegOperandFromUseList(&NewVAddr);
6902 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6906 MRI.removeRegOperandFromUseList(&NewVAddr);
6907 MRI.addRegOperandToUseList(&NewVAddr);
6909 assert(OldSAddrIdx == NewVAddrIdx);
6911 if (OldVAddrIdx >= 0) {
6912 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6913 AMDGPU::OpName::vdst_in);
6917 if (NewVDstIn != -1) {
6918 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6924 if (NewVDstIn != -1) {
6925 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6946 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6966 unsigned OpSubReg =
Op.getSubReg();
6969 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6975 Register DstReg =
MRI.createVirtualRegister(DstRC);
6985 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6988 bool ImpDef = Def->isImplicitDef();
6989 while (!ImpDef && Def && Def->isCopy()) {
6990 if (Def->getOperand(1).getReg().isPhysical())
6992 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6993 ImpDef = Def && Def->isImplicitDef();
6995 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7014 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7020 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
7021 unsigned NumSubRegs =
RegSize / 32;
7022 Register VScalarOp = ScalarOp->getReg();
7024 if (NumSubRegs == 1) {
7025 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7027 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7030 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7032 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7038 CondReg = NewCondReg;
7040 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7048 ScalarOp->setReg(CurReg);
7049 ScalarOp->setIsKill();
7053 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7054 "Unhandled register size");
7056 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7058 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7060 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7063 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7064 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7067 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7068 .
addReg(VScalarOp, VScalarOpUndef,
7069 TRI->getSubRegFromChannel(Idx + 1));
7075 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
7076 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7082 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7083 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
7086 if (NumSubRegs <= 2)
7087 Cmp.addReg(VScalarOp);
7089 Cmp.addReg(VScalarOp, VScalarOpUndef,
7090 TRI->getSubRegFromChannel(Idx, 2));
7094 CondReg = NewCondReg;
7096 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7104 const auto *SScalarOpRC =
7105 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
7106 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
7110 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7111 unsigned Channel = 0;
7112 for (
Register Piece : ReadlanePieces) {
7113 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7117 ScalarOp->setReg(SScalarOp);
7118 ScalarOp->setIsKill();
7122 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7123 MRI.setSimpleHint(SaveExec, CondReg);
7154 if (!Begin.isValid())
7156 if (!End.isValid()) {
7162 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7170 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7171 std::numeric_limits<unsigned>::max()) !=
7174 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7180 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7189 for (
auto I = Begin;
I != AfterMI;
I++) {
7190 for (
auto &MO :
I->all_uses())
7191 MRI.clearKillFlags(MO.getReg());
7216 MBB.addSuccessor(LoopBB);
7226 for (
auto &Succ : RemainderBB->
successors()) {
7250static std::tuple<unsigned, unsigned>
7258 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7259 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7262 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7263 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7264 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7265 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7266 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7283 .
addImm(AMDGPU::sub0_sub1)
7289 return std::tuple(RsrcPtr, NewSRsrc);
7326 if (
MI.getOpcode() == AMDGPU::PHI) {
7328 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7329 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7332 MRI.getRegClass(
MI.getOperand(i).getReg());
7333 if (RI.hasVectorRegisters(OpRC)) {
7347 VRC = &AMDGPU::VReg_1RegClass;
7350 ? RI.getEquivalentAGPRClass(SRC)
7351 : RI.getEquivalentVGPRClass(SRC);
7354 ? RI.getEquivalentAGPRClass(VRC)
7355 : RI.getEquivalentVGPRClass(VRC);
7363 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7365 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7381 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7384 if (RI.hasVGPRs(DstRC)) {
7388 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7390 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7408 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7413 if (DstRC != Src0RC) {
7422 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7424 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7430 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7431 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7432 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7433 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7434 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7435 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7436 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7438 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7451 ? AMDGPU::OpName::rsrc
7452 : AMDGPU::OpName::srsrc;
7454 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7457 AMDGPU::OpName SampOpName =
7458 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7460 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7467 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7469 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7473 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7474 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7479 while (Start->getOpcode() != FrameSetupOpcode)
7482 while (End->getOpcode() != FrameDestroyOpcode)
7486 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7487 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7495 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7497 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7499 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7509 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7510 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7511 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7512 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7514 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7521 bool isSoffsetLegal =
true;
7523 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7524 if (SoffsetIdx != -1) {
7527 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7528 isSoffsetLegal =
false;
7532 bool isRsrcLegal =
true;
7534 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7535 if (RsrcIdx != -1) {
7538 isRsrcLegal =
false;
7542 if (isRsrcLegal && isSoffsetLegal)
7566 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7567 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7568 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7570 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7571 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7572 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7574 unsigned RsrcPtr, NewSRsrc;
7581 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7588 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7602 }
else if (!VAddr && ST.hasAddr64()) {
7606 "FIXME: Need to emit flat atomics here");
7608 unsigned RsrcPtr, NewSRsrc;
7611 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7634 MIB.
addImm(CPol->getImm());
7639 MIB.
addImm(TFE->getImm());
7659 MI.removeFromParent();
7664 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7666 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7670 if (!isSoffsetLegal) {
7682 if (!isSoffsetLegal) {
7694 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7695 if (RsrcIdx != -1) {
7696 DeferredList.insert(
MI);
7701 return DeferredList.contains(
MI);
7711 if (!ST.useRealTrue16Insts())
7714 unsigned Opcode =
MI.getOpcode();
7718 OpIdx >=
get(Opcode).getNumOperands() ||
7719 get(Opcode).operands()[
OpIdx].RegClass == -1)
7723 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7727 if (!RI.isVGPRClass(CurrRC))
7730 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7732 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7733 Op.setSubReg(AMDGPU::lo16);
7734 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7736 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7737 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7744 Op.setReg(NewDstReg);
7756 while (!Worklist.
empty()) {
7770 "Deferred MachineInstr are not supposed to re-populate worklist");
7790 case AMDGPU::S_ADD_I32:
7791 case AMDGPU::S_SUB_I32: {
7795 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7803 case AMDGPU::S_MUL_U64:
7804 if (ST.hasVectorMulU64()) {
7805 NewOpcode = AMDGPU::V_MUL_U64_e64;
7809 splitScalarSMulU64(Worklist, Inst, MDT);
7813 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7814 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7817 splitScalarSMulPseudo(Worklist, Inst, MDT);
7821 case AMDGPU::S_AND_B64:
7822 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7826 case AMDGPU::S_OR_B64:
7827 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7831 case AMDGPU::S_XOR_B64:
7832 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7836 case AMDGPU::S_NAND_B64:
7837 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7841 case AMDGPU::S_NOR_B64:
7842 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7846 case AMDGPU::S_XNOR_B64:
7847 if (ST.hasDLInsts())
7848 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7850 splitScalar64BitXnor(Worklist, Inst, MDT);
7854 case AMDGPU::S_ANDN2_B64:
7855 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7859 case AMDGPU::S_ORN2_B64:
7860 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7864 case AMDGPU::S_BREV_B64:
7865 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7869 case AMDGPU::S_NOT_B64:
7870 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7874 case AMDGPU::S_BCNT1_I32_B64:
7875 splitScalar64BitBCNT(Worklist, Inst);
7879 case AMDGPU::S_BFE_I64:
7880 splitScalar64BitBFE(Worklist, Inst);
7884 case AMDGPU::S_FLBIT_I32_B64:
7885 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7888 case AMDGPU::S_FF1_I32_B64:
7889 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7893 case AMDGPU::S_LSHL_B32:
7894 if (ST.hasOnlyRevVALUShifts()) {
7895 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7899 case AMDGPU::S_ASHR_I32:
7900 if (ST.hasOnlyRevVALUShifts()) {
7901 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7905 case AMDGPU::S_LSHR_B32:
7906 if (ST.hasOnlyRevVALUShifts()) {
7907 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7911 case AMDGPU::S_LSHL_B64:
7912 if (ST.hasOnlyRevVALUShifts()) {
7914 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7915 : AMDGPU::V_LSHLREV_B64_e64;
7919 case AMDGPU::S_ASHR_I64:
7920 if (ST.hasOnlyRevVALUShifts()) {
7921 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7925 case AMDGPU::S_LSHR_B64:
7926 if (ST.hasOnlyRevVALUShifts()) {
7927 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7932 case AMDGPU::S_ABS_I32:
7933 lowerScalarAbs(Worklist, Inst);
7937 case AMDGPU::S_ABSDIFF_I32:
7938 lowerScalarAbsDiff(Worklist, Inst);
7942 case AMDGPU::S_CBRANCH_SCC0:
7943 case AMDGPU::S_CBRANCH_SCC1: {
7946 bool IsSCC = CondReg == AMDGPU::SCC;
7954 case AMDGPU::S_BFE_U64:
7955 case AMDGPU::S_BFM_B64:
7958 case AMDGPU::S_PACK_LL_B32_B16:
7959 case AMDGPU::S_PACK_LH_B32_B16:
7960 case AMDGPU::S_PACK_HL_B32_B16:
7961 case AMDGPU::S_PACK_HH_B32_B16:
7962 movePackToVALU(Worklist,
MRI, Inst);
7966 case AMDGPU::S_XNOR_B32:
7967 lowerScalarXnor(Worklist, Inst);
7971 case AMDGPU::S_NAND_B32:
7972 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7976 case AMDGPU::S_NOR_B32:
7977 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7981 case AMDGPU::S_ANDN2_B32:
7982 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7986 case AMDGPU::S_ORN2_B32:
7987 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7995 case AMDGPU::S_ADD_CO_PSEUDO:
7996 case AMDGPU::S_SUB_CO_PSEUDO: {
7997 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7998 ? AMDGPU::V_ADDC_U32_e64
7999 : AMDGPU::V_SUBB_U32_e64;
8000 const auto *CarryRC = RI.getWaveMaskRegClass();
8003 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
8004 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
8011 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
8022 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8026 case AMDGPU::S_UADDO_PSEUDO:
8027 case AMDGPU::S_USUBO_PSEUDO: {
8033 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8034 ? AMDGPU::V_ADD_CO_U32_e64
8035 : AMDGPU::V_SUB_CO_U32_e64;
8037 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
8038 Register DestReg =
MRI.createVirtualRegister(NewRC);
8046 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
8047 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8051 case AMDGPU::S_LSHL1_ADD_U32:
8052 case AMDGPU::S_LSHL2_ADD_U32:
8053 case AMDGPU::S_LSHL3_ADD_U32:
8054 case AMDGPU::S_LSHL4_ADD_U32: {
8058 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8059 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8060 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8064 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg()));
8065 Register DestReg =
MRI.createVirtualRegister(NewRC);
8073 MRI.replaceRegWith(Dest.
getReg(), DestReg);
8074 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8078 case AMDGPU::S_CSELECT_B32:
8079 case AMDGPU::S_CSELECT_B64:
8080 lowerSelect(Worklist, Inst, MDT);
8083 case AMDGPU::S_CMP_EQ_I32:
8084 case AMDGPU::S_CMP_LG_I32:
8085 case AMDGPU::S_CMP_GT_I32:
8086 case AMDGPU::S_CMP_GE_I32:
8087 case AMDGPU::S_CMP_LT_I32:
8088 case AMDGPU::S_CMP_LE_I32:
8089 case AMDGPU::S_CMP_EQ_U32:
8090 case AMDGPU::S_CMP_LG_U32:
8091 case AMDGPU::S_CMP_GT_U32:
8092 case AMDGPU::S_CMP_GE_U32:
8093 case AMDGPU::S_CMP_LT_U32:
8094 case AMDGPU::S_CMP_LE_U32:
8095 case AMDGPU::S_CMP_EQ_U64:
8096 case AMDGPU::S_CMP_LG_U64:
8097 case AMDGPU::S_CMP_LT_F32:
8098 case AMDGPU::S_CMP_EQ_F32:
8099 case AMDGPU::S_CMP_LE_F32:
8100 case AMDGPU::S_CMP_GT_F32:
8101 case AMDGPU::S_CMP_LG_F32:
8102 case AMDGPU::S_CMP_GE_F32:
8103 case AMDGPU::S_CMP_O_F32:
8104 case AMDGPU::S_CMP_U_F32:
8105 case AMDGPU::S_CMP_NGE_F32:
8106 case AMDGPU::S_CMP_NLG_F32:
8107 case AMDGPU::S_CMP_NGT_F32:
8108 case AMDGPU::S_CMP_NLE_F32:
8109 case AMDGPU::S_CMP_NEQ_F32:
8110 case AMDGPU::S_CMP_NLT_F32: {
8111 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8115 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8129 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8133 case AMDGPU::S_CMP_LT_F16:
8134 case AMDGPU::S_CMP_EQ_F16:
8135 case AMDGPU::S_CMP_LE_F16:
8136 case AMDGPU::S_CMP_GT_F16:
8137 case AMDGPU::S_CMP_LG_F16:
8138 case AMDGPU::S_CMP_GE_F16:
8139 case AMDGPU::S_CMP_O_F16:
8140 case AMDGPU::S_CMP_U_F16:
8141 case AMDGPU::S_CMP_NGE_F16:
8142 case AMDGPU::S_CMP_NLG_F16:
8143 case AMDGPU::S_CMP_NGT_F16:
8144 case AMDGPU::S_CMP_NLE_F16:
8145 case AMDGPU::S_CMP_NEQ_F16:
8146 case AMDGPU::S_CMP_NLT_F16: {
8147 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8169 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8173 case AMDGPU::S_CVT_HI_F32_F16: {
8174 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8175 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8176 if (ST.useRealTrue16Insts()) {
8181 .
addReg(TmpReg, 0, AMDGPU::hi16)
8197 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8201 case AMDGPU::S_MINIMUM_F32:
8202 case AMDGPU::S_MAXIMUM_F32: {
8203 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8214 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8218 case AMDGPU::S_MINIMUM_F16:
8219 case AMDGPU::S_MAXIMUM_F16: {
8220 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8221 ? &AMDGPU::VGPR_16RegClass
8222 : &AMDGPU::VGPR_32RegClass);
8234 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8238 case AMDGPU::V_S_EXP_F16_e64:
8239 case AMDGPU::V_S_LOG_F16_e64:
8240 case AMDGPU::V_S_RCP_F16_e64:
8241 case AMDGPU::V_S_RSQ_F16_e64:
8242 case AMDGPU::V_S_SQRT_F16_e64: {
8243 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8244 ? &AMDGPU::VGPR_16RegClass
8245 : &AMDGPU::VGPR_32RegClass);
8257 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8263 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8271 if (NewOpcode == Opcode) {
8279 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8281 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8295 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8302 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8303 MRI.replaceRegWith(DstReg, NewDstReg);
8304 MRI.clearKillFlags(NewDstReg);
8307 if (!
MRI.constrainRegClass(NewDstReg, CommonRC))
8324 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8328 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8329 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8330 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8332 get(AMDGPU::IMPLICIT_DEF), Undef);
8334 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8340 MRI.replaceRegWith(DstReg, NewDstReg);
8341 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8343 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8346 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8347 MRI.replaceRegWith(DstReg, NewDstReg);
8348 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8353 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8354 MRI.replaceRegWith(DstReg, NewDstReg);
8356 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8366 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8367 AMDGPU::OpName::src0_modifiers) >= 0)
8371 NewInstr->addOperand(Src);
8374 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8377 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8379 NewInstr.addImm(
Size);
8380 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8384 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8389 "Scalar BFE is only implemented for constant width and offset");
8397 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8398 AMDGPU::OpName::src1_modifiers) >= 0)
8400 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8402 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8403 AMDGPU::OpName::src2_modifiers) >= 0)
8405 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8407 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8409 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8411 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8417 NewInstr->addOperand(
Op);
8424 if (
Op.getReg() == AMDGPU::SCC) {
8426 if (
Op.isDef() && !
Op.isDead())
8427 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8429 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8434 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8435 Register DstReg = NewInstr->getOperand(0).getReg();
8440 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8441 MRI.replaceRegWith(DstReg, NewDstReg);
8450 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8454std::pair<bool, MachineBasicBlock *>
8466 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8469 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8471 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8472 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8480 MRI.replaceRegWith(OldDstReg, ResultReg);
8483 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8484 return std::pair(
true, NewBB);
8487 return std::pair(
false,
nullptr);
8504 bool IsSCC = (CondReg == AMDGPU::SCC);
8512 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8518 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8519 NewCondReg =
MRI.createVirtualRegister(TC);
8523 bool CopyFound =
false;
8524 for (MachineInstr &CandI :
8527 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8529 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8531 .
addReg(CandI.getOperand(1).getReg());
8543 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8551 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8552 MachineInstr *NewInst;
8553 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8554 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8567 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8569 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8581 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8582 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8584 unsigned SubOp = ST.hasAddNoCarry() ?
8585 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8595 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8596 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8609 Register SubResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8610 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8611 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8614 ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8626 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8627 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8641 if (ST.hasDLInsts()) {
8642 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8650 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8651 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8657 bool Src0IsSGPR = Src0.
isReg() &&
8658 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8659 bool Src1IsSGPR = Src1.
isReg() &&
8660 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8662 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8663 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8673 }
else if (Src1IsSGPR) {
8687 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8691 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8697 unsigned Opcode)
const {
8707 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8708 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8720 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8721 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8726 unsigned Opcode)
const {
8736 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8737 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8749 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8750 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8765 const MCInstrDesc &InstDesc =
get(Opcode);
8766 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8768 &AMDGPU::SGPR_32RegClass;
8770 const TargetRegisterClass *Src0SubRC =
8771 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8774 AMDGPU::sub0, Src0SubRC);
8776 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8777 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8778 const TargetRegisterClass *NewDestSubRC =
8779 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8781 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8782 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8785 AMDGPU::sub1, Src0SubRC);
8787 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8788 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8793 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8800 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8802 Worklist.
insert(&LoHalf);
8803 Worklist.
insert(&HiHalf);
8809 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8820 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8821 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8822 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8830 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8831 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8832 const TargetRegisterClass *Src0SubRC =
8833 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8834 if (RI.isSGPRClass(Src0SubRC))
8835 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8836 const TargetRegisterClass *Src1SubRC =
8837 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8838 if (RI.isSGPRClass(Src1SubRC))
8839 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8843 MachineOperand Op0L =
8845 MachineOperand Op1L =
8847 MachineOperand Op0H =
8849 MachineOperand Op1H =
8867 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8868 MachineInstr *Op1L_Op0H =
8873 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8874 MachineInstr *Op1H_Op0L =
8879 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8880 MachineInstr *Carry =
8885 MachineInstr *LoHalf =
8890 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8895 MachineInstr *HiHalf =
8906 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8918 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8929 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8930 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8931 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8939 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8940 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8941 const TargetRegisterClass *Src0SubRC =
8942 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8943 if (RI.isSGPRClass(Src0SubRC))
8944 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8945 const TargetRegisterClass *Src1SubRC =
8946 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8947 if (RI.isSGPRClass(Src1SubRC))
8948 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8952 MachineOperand Op0L =
8954 MachineOperand Op1L =
8958 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8959 ? AMDGPU::V_MUL_HI_U32_e64
8960 : AMDGPU::V_MUL_HI_I32_e64;
8961 MachineInstr *HiHalf =
8964 MachineInstr *LoHalf =
8975 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8983 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8999 const MCInstrDesc &InstDesc =
get(Opcode);
9000 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
9002 &AMDGPU::SGPR_32RegClass;
9004 const TargetRegisterClass *Src0SubRC =
9005 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9006 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
9008 &AMDGPU::SGPR_32RegClass;
9010 const TargetRegisterClass *Src1SubRC =
9011 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9014 AMDGPU::sub0, Src0SubRC);
9016 AMDGPU::sub0, Src1SubRC);
9018 AMDGPU::sub1, Src0SubRC);
9020 AMDGPU::sub1, Src1SubRC);
9022 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9023 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9024 const TargetRegisterClass *NewDestSubRC =
9025 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9027 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
9028 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9032 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
9033 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9037 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
9044 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
9046 Worklist.
insert(&LoHalf);
9047 Worklist.
insert(&HiHalf);
9050 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
9066 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9068 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
9070 MachineOperand* Op0;
9071 MachineOperand* Op1;
9084 Register NewDest =
MRI.createVirtualRegister(DestRC);
9090 MRI.replaceRegWith(Dest.
getReg(), NewDest);
9106 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9107 const TargetRegisterClass *SrcRC = Src.isReg() ?
9108 MRI.getRegClass(Src.getReg()) :
9109 &AMDGPU::SGPR_32RegClass;
9111 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9112 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9114 const TargetRegisterClass *SrcSubRC =
9115 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9118 AMDGPU::sub0, SrcSubRC);
9120 AMDGPU::sub1, SrcSubRC);
9126 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9130 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9149 Offset == 0 &&
"Not implemented");
9152 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9153 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9154 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9171 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9172 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9177 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9178 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9182 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
9185 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
9190 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9191 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9210 const MCInstrDesc &InstDesc =
get(Opcode);
9212 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9213 unsigned OpcodeAdd =
9214 ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
9216 const TargetRegisterClass *SrcRC =
9217 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9218 const TargetRegisterClass *SrcSubRC =
9219 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9221 MachineOperand SrcRegSub0 =
9223 MachineOperand SrcRegSub1 =
9226 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9227 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9228 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9229 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9236 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9242 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9244 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9246 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9249void SIInstrInfo::addUsersToMoveToVALUWorklist(
9253 MachineInstr &
UseMI = *MO.getParent();
9257 switch (
UseMI.getOpcode()) {
9260 case AMDGPU::SOFT_WQM:
9261 case AMDGPU::STRICT_WWM:
9262 case AMDGPU::STRICT_WQM:
9263 case AMDGPU::REG_SEQUENCE:
9265 case AMDGPU::INSERT_SUBREG:
9268 OpNo = MO.getOperandNo();
9273 MRI.constrainRegClass(DstReg, OpRC);
9275 if (!RI.hasVectorRegisters(OpRC))
9286 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9292 if (ST.useRealTrue16Insts()) {
9295 SrcReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9302 SrcReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9308 bool isSrc0Reg16 =
MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
9309 bool isSrc1Reg16 =
MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
9311 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9313 case AMDGPU::S_PACK_LL_B32_B16:
9316 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9317 .addImm(AMDGPU::lo16)
9319 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9320 .addImm(AMDGPU::hi16);
9322 case AMDGPU::S_PACK_LH_B32_B16:
9325 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9326 .addImm(AMDGPU::lo16)
9327 .addReg(SrcReg1, 0, AMDGPU::hi16)
9328 .addImm(AMDGPU::hi16);
9330 case AMDGPU::S_PACK_HL_B32_B16:
9331 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9332 .addImm(AMDGPU::lo16)
9334 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9335 .addImm(AMDGPU::hi16);
9337 case AMDGPU::S_PACK_HH_B32_B16:
9338 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9339 .addImm(AMDGPU::lo16)
9340 .addReg(SrcReg1, 0, AMDGPU::hi16)
9341 .addImm(AMDGPU::hi16);
9348 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9349 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9354 case AMDGPU::S_PACK_LL_B32_B16: {
9355 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9356 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9373 case AMDGPU::S_PACK_LH_B32_B16: {
9374 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9383 case AMDGPU::S_PACK_HL_B32_B16: {
9384 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9394 case AMDGPU::S_PACK_HH_B32_B16: {
9395 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9396 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9413 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9414 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9423 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9424 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9425 SmallVector<MachineInstr *, 4> CopyToDelete;
9428 for (MachineInstr &
MI :
9432 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9435 MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9436 Register DestReg =
MI.getOperand(0).getReg();
9438 MRI.replaceRegWith(DestReg, NewCond);
9443 MI.getOperand(SCCIdx).setReg(NewCond);
9449 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9452 for (
auto &Copy : CopyToDelete)
9453 Copy->eraseFromParent();
9461void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9467 for (MachineInstr &
MI :
9470 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9472 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9481 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9489 case AMDGPU::REG_SEQUENCE:
9490 case AMDGPU::INSERT_SUBREG:
9492 case AMDGPU::SOFT_WQM:
9493 case AMDGPU::STRICT_WWM:
9494 case AMDGPU::STRICT_WQM: {
9496 if (RI.isAGPRClass(SrcRC)) {
9497 if (RI.isAGPRClass(NewDstRC))
9502 case AMDGPU::REG_SEQUENCE:
9503 case AMDGPU::INSERT_SUBREG:
9504 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9507 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9513 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9516 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9530 int OpIndices[3])
const {
9531 const MCInstrDesc &
Desc =
MI.getDesc();
9547 const MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9549 for (
unsigned i = 0; i < 3; ++i) {
9550 int Idx = OpIndices[i];
9554 const MachineOperand &MO =
MI.getOperand(Idx);
9560 const TargetRegisterClass *OpRC =
9561 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9562 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9568 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9569 if (RI.isSGPRClass(RegRC))
9587 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9588 SGPRReg = UsedSGPRs[0];
9591 if (!SGPRReg && UsedSGPRs[1]) {
9592 if (UsedSGPRs[1] == UsedSGPRs[2])
9593 SGPRReg = UsedSGPRs[1];
9600 AMDGPU::OpName OperandName)
const {
9601 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9604 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9608 return &
MI.getOperand(Idx);
9622 if (ST.isAmdHsaOS()) {
9625 RsrcDataFormat |= (1ULL << 56);
9630 RsrcDataFormat |= (2ULL << 59);
9633 return RsrcDataFormat;
9643 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9648 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9655 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9661 unsigned Opc =
MI.getOpcode();
9667 return get(
Opc).mayLoad() &&
9672 int &FrameIndex)
const {
9674 if (!Addr || !Addr->
isFI())
9685 int &FrameIndex)
const {
9693 int &FrameIndex)
const {
9707 int &FrameIndex)
const {
9724 while (++
I != E &&
I->isInsideBundle()) {
9725 assert(!
I->isBundle() &&
"No nested bundle!");
9733 unsigned Opc =
MI.getOpcode();
9735 unsigned DescSize =
Desc.getSize();
9740 unsigned Size = DescSize;
9744 if (
MI.isBranch() && ST.hasOffset3fBug())
9755 bool HasLiteral =
false;
9756 unsigned LiteralSize = 4;
9757 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9762 if (ST.has64BitLiterals()) {
9763 switch (OpInfo.OperandType) {
9779 return HasLiteral ? DescSize + LiteralSize : DescSize;
9784 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9788 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9789 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9793 case TargetOpcode::BUNDLE:
9795 case TargetOpcode::INLINEASM:
9796 case TargetOpcode::INLINEASM_BR: {
9798 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9802 if (
MI.isMetaInstruction())
9806 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9809 unsigned LoInstOpcode = D16Info->LoOp;
9811 DescSize =
Desc.getSize();
9815 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9818 DescSize =
Desc.getSize();
9829 if (
MI.memoperands_empty())
9841 static const std::pair<int, const char *> TargetIndices[] = {
9879std::pair<unsigned, unsigned>
9886 static const std::pair<unsigned, const char *> TargetFlags[] = {
9904 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9919 return AMDGPU::WWM_COPY;
9921 return AMDGPU::COPY;
9938 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
9942 if (RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg)))
9943 return IsLRSplitInst;
9956 bool IsNullOrVectorRegister =
true;
9960 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9963 return IsNullOrVectorRegister &&
9965 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
9966 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9974 if (ST.hasAddNoCarry())
9978 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9979 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9990 if (ST.hasAddNoCarry())
9994 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
9996 : RS.scavengeRegisterBackwards(
9997 *RI.getBoolRC(),
I,
false,
10010 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
10011 case AMDGPU::SI_KILL_I1_TERMINATOR:
10020 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10021 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10022 case AMDGPU::SI_KILL_I1_PSEUDO:
10023 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10035 const unsigned OffsetBits =
10037 return (1 << OffsetBits) - 1;
10041 if (!ST.isWave32())
10044 if (
MI.isInlineAsm())
10047 for (
auto &
Op :
MI.implicit_operands()) {
10048 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10049 Op.setReg(AMDGPU::VCC_LO);
10058 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10062 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10063 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10079 if (Imm > MaxImm) {
10080 if (Imm <= MaxImm + 64) {
10082 Overflow = Imm - MaxImm;
10101 if (Overflow > 0) {
10109 if (ST.hasRestrictedSOffset())
10114 SOffset = Overflow;
10152 if (!ST.hasFlatInstOffsets())
10160 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10172std::pair<int64_t, int64_t>
10175 int64_t RemainderOffset = COffsetVal;
10176 int64_t ImmField = 0;
10181 if (AllowNegative) {
10183 int64_t
D = 1LL << NumBits;
10184 RemainderOffset = (COffsetVal /
D) *
D;
10185 ImmField = COffsetVal - RemainderOffset;
10187 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10189 (ImmField % 4) != 0) {
10191 RemainderOffset += ImmField % 4;
10192 ImmField -= ImmField % 4;
10194 }
else if (COffsetVal >= 0) {
10196 RemainderOffset = COffsetVal - ImmField;
10200 assert(RemainderOffset + ImmField == COffsetVal);
10201 return {ImmField, RemainderOffset};
10205 if (ST.hasNegativeScratchOffsetBug() &&
10213 switch (ST.getGeneration()) {
10239 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10240 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10241 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10242 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10243 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10244 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10245 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10246 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10253#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10254 case OPCODE##_dpp: \
10255 case OPCODE##_e32: \
10256 case OPCODE##_e64: \
10257 case OPCODE##_e64_dpp: \
10258 case OPCODE##_sdwa:
10272 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10273 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10274 case AMDGPU::V_FMA_F16_gfx9_e64:
10275 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10276 case AMDGPU::V_INTERP_P2_F16:
10277 case AMDGPU::V_MAD_F16_e64:
10278 case AMDGPU::V_MAD_U16_e64:
10279 case AMDGPU::V_MAD_I16_e64:
10288 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10302 switch (ST.getGeneration()) {
10315 if (
isMAI(Opcode)) {
10323 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
10330 if (ST.hasGFX90AInsts()) {
10332 if (ST.hasGFX940Insts())
10363 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10364 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10365 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10377 switch (
MI.getOpcode()) {
10379 case AMDGPU::REG_SEQUENCE:
10383 case AMDGPU::INSERT_SUBREG:
10384 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10401 if (!
P.Reg.isVirtual())
10405 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10406 while (
auto *
MI = DefInst) {
10408 switch (
MI->getOpcode()) {
10410 case AMDGPU::V_MOV_B32_e32: {
10411 auto &Op1 =
MI->getOperand(1);
10416 DefInst =
MRI.getVRegDef(RSR.Reg);
10424 DefInst =
MRI.getVRegDef(RSR.Reg);
10437 assert(
MRI.isSSA() &&
"Must be run on SSA");
10439 auto *
TRI =
MRI.getTargetRegisterInfo();
10440 auto *DefBB =
DefMI.getParent();
10444 if (
UseMI.getParent() != DefBB)
10447 const int MaxInstScan = 20;
10451 auto E =
UseMI.getIterator();
10452 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10453 if (
I->isDebugInstr())
10456 if (++NumInst > MaxInstScan)
10459 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10469 assert(
MRI.isSSA() &&
"Must be run on SSA");
10471 auto *
TRI =
MRI.getTargetRegisterInfo();
10472 auto *DefBB =
DefMI.getParent();
10474 const int MaxUseScan = 10;
10477 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10478 auto &UseInst = *
Use.getParent();
10481 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10484 if (++NumUse > MaxUseScan)
10491 const int MaxInstScan = 20;
10495 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10498 if (
I->isDebugInstr())
10501 if (++NumInst > MaxInstScan)
10514 if (Reg == VReg && --NumUse == 0)
10516 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10525 auto Cur =
MBB.begin();
10526 if (Cur !=
MBB.end())
10528 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10531 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10540 if (InsPt !=
MBB.end() &&
10541 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10542 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10543 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10544 InsPt->definesRegister(Src,
nullptr)) {
10548 .
addReg(Src, 0, SrcSubReg)
10573 if (isFullCopyInstr(
MI)) {
10574 Register DstReg =
MI.getOperand(0).getReg();
10575 Register SrcReg =
MI.getOperand(1).getReg();
10582 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10586 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10597 unsigned *PredCost)
const {
10598 if (
MI.isBundle()) {
10601 unsigned Lat = 0,
Count = 0;
10602 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10604 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10606 return Lat +
Count - 1;
10609 return SchedModel.computeInstrLatency(&
MI);
10616 return *CallAddrOp;
10623 unsigned Opcode =
MI.getOpcode();
10628 :
MI.getOperand(1).getReg();
10629 LLT DstTy =
MRI.getType(Dst);
10630 LLT SrcTy =
MRI.getType(Src);
10632 unsigned SrcAS = SrcTy.getAddressSpace();
10635 ST.hasGloballyAddressableScratch()
10643 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10644 return HandleAddrSpaceCast(
MI);
10647 auto IID = GI->getIntrinsicID();
10654 case Intrinsic::amdgcn_addrspacecast_nonnull:
10655 return HandleAddrSpaceCast(
MI);
10656 case Intrinsic::amdgcn_if:
10657 case Intrinsic::amdgcn_else:
10671 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10672 Opcode == AMDGPU::G_SEXTLOAD) {
10673 if (
MI.memoperands_empty())
10677 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10678 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10686 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10687 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10688 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10701 unsigned opcode =
MI.getOpcode();
10702 if (opcode == AMDGPU::V_READLANE_B32 ||
10703 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10704 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10707 if (isCopyInstr(
MI)) {
10711 RI.getPhysRegBaseClass(srcOp.
getReg());
10719 if (
MI.isPreISelOpcode())
10734 if (
MI.memoperands_empty())
10738 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10739 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10754 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10756 if (!
SrcOp.isReg())
10760 if (!Reg || !
SrcOp.readsReg())
10766 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10793 F,
"ds_ordered_count unsupported for this calling conv"));
10807 Register &SrcReg2, int64_t &CmpMask,
10808 int64_t &CmpValue)
const {
10809 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10812 switch (
MI.getOpcode()) {
10815 case AMDGPU::S_CMP_EQ_U32:
10816 case AMDGPU::S_CMP_EQ_I32:
10817 case AMDGPU::S_CMP_LG_U32:
10818 case AMDGPU::S_CMP_LG_I32:
10819 case AMDGPU::S_CMP_LT_U32:
10820 case AMDGPU::S_CMP_LT_I32:
10821 case AMDGPU::S_CMP_GT_U32:
10822 case AMDGPU::S_CMP_GT_I32:
10823 case AMDGPU::S_CMP_LE_U32:
10824 case AMDGPU::S_CMP_LE_I32:
10825 case AMDGPU::S_CMP_GE_U32:
10826 case AMDGPU::S_CMP_GE_I32:
10827 case AMDGPU::S_CMP_EQ_U64:
10828 case AMDGPU::S_CMP_LG_U64:
10829 SrcReg =
MI.getOperand(0).getReg();
10830 if (
MI.getOperand(1).isReg()) {
10831 if (
MI.getOperand(1).getSubReg())
10833 SrcReg2 =
MI.getOperand(1).getReg();
10835 }
else if (
MI.getOperand(1).isImm()) {
10837 CmpValue =
MI.getOperand(1).getImm();
10843 case AMDGPU::S_CMPK_EQ_U32:
10844 case AMDGPU::S_CMPK_EQ_I32:
10845 case AMDGPU::S_CMPK_LG_U32:
10846 case AMDGPU::S_CMPK_LG_I32:
10847 case AMDGPU::S_CMPK_LT_U32:
10848 case AMDGPU::S_CMPK_LT_I32:
10849 case AMDGPU::S_CMPK_GT_U32:
10850 case AMDGPU::S_CMPK_GT_I32:
10851 case AMDGPU::S_CMPK_LE_U32:
10852 case AMDGPU::S_CMPK_LE_I32:
10853 case AMDGPU::S_CMPK_GE_U32:
10854 case AMDGPU::S_CMPK_GE_I32:
10855 SrcReg =
MI.getOperand(0).getReg();
10857 CmpValue =
MI.getOperand(1).getImm();
10867 if (S->isLiveIn(AMDGPU::SCC))
10876bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
10879 bool SCCIsDead =
false;
10882 constexpr unsigned ScanLimit = 12;
10883 unsigned Count = 0;
10884 for (MachineInstr &
MI :
10886 if (++
Count > ScanLimit)
10888 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
10889 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
10890 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
10891 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10892 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
10897 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
10910 for (MachineInstr *
MI : InvertInstr) {
10911 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10912 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
10914 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10915 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
10916 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
10917 ? AMDGPU::S_CBRANCH_SCC1
10918 : AMDGPU::S_CBRANCH_SCC0));
10931 bool NeedInversion)
const {
10932 MachineInstr *KillsSCC =
nullptr;
10937 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
10939 if (
MI.killsRegister(AMDGPU::SCC, &RI))
10942 if (NeedInversion && !invertSCCUse(SCCRedefine))
10944 if (MachineOperand *SccDef =
10946 SccDef->setIsDead(
false);
10954 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
10955 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
10957 bool Op1IsNonZeroImm =
10958 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
10959 bool Op2IsZeroImm =
10960 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
10961 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
10967 Register SrcReg2, int64_t CmpMask,
10976 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue,
MRI,
10977 this](
bool NeedInversion) ->
bool {
10998 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11008 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11009 MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
11015 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11023 optimizeSCC(
Select, Def,
false);
11030 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
11031 this](int64_t ExpectedValue,
unsigned SrcSize,
11032 bool IsReversible,
bool IsSigned) ->
bool {
11060 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11061 Def->getOpcode() != AMDGPU::S_AND_B64)
11065 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11076 SrcOp = &Def->getOperand(2);
11077 else if (isMask(&Def->getOperand(2)))
11078 SrcOp = &Def->getOperand(1);
11086 if (IsSigned && BitNo == SrcSize - 1)
11089 ExpectedValue <<= BitNo;
11091 bool IsReversedCC =
false;
11092 if (CmpValue != ExpectedValue) {
11095 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11100 Register DefReg = Def->getOperand(0).getReg();
11101 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
11104 if (!optimizeSCC(Def, &CmpInstr,
false))
11107 if (!
MRI->use_nodbg_empty(DefReg)) {
11115 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11116 : AMDGPU::S_BITCMP1_B32
11117 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11118 : AMDGPU::S_BITCMP1_B64;
11123 Def->eraseFromParent();
11131 case AMDGPU::S_CMP_EQ_U32:
11132 case AMDGPU::S_CMP_EQ_I32:
11133 case AMDGPU::S_CMPK_EQ_U32:
11134 case AMDGPU::S_CMPK_EQ_I32:
11135 return optimizeCmpAnd(1, 32,
true,
false) ||
11136 optimizeCmpSelect(
true);
11137 case AMDGPU::S_CMP_GE_U32:
11138 case AMDGPU::S_CMPK_GE_U32:
11139 return optimizeCmpAnd(1, 32,
false,
false);
11140 case AMDGPU::S_CMP_GE_I32:
11141 case AMDGPU::S_CMPK_GE_I32:
11142 return optimizeCmpAnd(1, 32,
false,
true);
11143 case AMDGPU::S_CMP_EQ_U64:
11144 return optimizeCmpAnd(1, 64,
true,
false);
11145 case AMDGPU::S_CMP_LG_U32:
11146 case AMDGPU::S_CMP_LG_I32:
11147 case AMDGPU::S_CMPK_LG_U32:
11148 case AMDGPU::S_CMPK_LG_I32:
11149 return optimizeCmpAnd(0, 32,
true,
false) ||
11150 optimizeCmpSelect(
false);
11151 case AMDGPU::S_CMP_GT_U32:
11152 case AMDGPU::S_CMPK_GT_U32:
11153 return optimizeCmpAnd(0, 32,
false,
false);
11154 case AMDGPU::S_CMP_GT_I32:
11155 case AMDGPU::S_CMPK_GT_I32:
11156 return optimizeCmpAnd(0, 32,
false,
true);
11157 case AMDGPU::S_CMP_LG_U64:
11158 return optimizeCmpAnd(0, 64,
true,
false) ||
11159 optimizeCmpSelect(
false);
11166 AMDGPU::OpName
OpName)
const {
11167 if (!ST.needsAlignedVGPRs())
11170 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11182 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
11184 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11187 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
11188 : &AMDGPU::VReg_64_Align2RegClass);
11190 .
addReg(DataReg, 0,
Op.getSubReg())
11195 Op.setSubReg(AMDGPU::sub0);
11217 unsigned Opcode =
MI.getOpcode();
11223 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11224 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11227 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasAddNoCarry() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool setsSCCifResultIsNonZero(const MachineInstr &MI)
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr unsigned getKillRegState(bool B)
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
constexpr unsigned getUndefRegState(bool B)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.