33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
90 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
91 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
93 if (Op0Idx == -1 && Op1Idx == -1)
97 if ((Op0Idx == -1 && Op1Idx != -1) ||
98 (Op1Idx == -1 && Op0Idx != -1))
119 return !
MI.memoperands_empty() &&
121 return MMO->isLoad() && MMO->isInvariant();
143 if (!
MI.hasImplicitDef() &&
144 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
145 !
MI.mayRaiseFPException())
153bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
156 if (
MI.isCompare()) {
167 switch (
Use.getOpcode()) {
168 case AMDGPU::S_AND_SAVEEXEC_B32:
169 case AMDGPU::S_AND_SAVEEXEC_B64:
171 case AMDGPU::S_AND_B32:
172 case AMDGPU::S_AND_B64:
173 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
183 switch (
MI.getOpcode()) {
186 case AMDGPU::V_READFIRSTLANE_B32:
203 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
208 for (
auto Op :
MI.uses()) {
209 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
210 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
215 if (FromCycle ==
nullptr)
221 while (FromCycle && !FromCycle->
contains(ToCycle)) {
241 int64_t &Offset1)
const {
249 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
253 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
269 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
270 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
271 if (Offset0Idx == -1 || Offset1Idx == -1)
278 Offset0Idx -=
get(Opc0).NumDefs;
279 Offset1Idx -=
get(Opc1).NumDefs;
309 if (!Load0Offset || !Load1Offset)
326 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
327 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
329 if (OffIdx0 == -1 || OffIdx1 == -1)
335 OffIdx0 -=
get(Opc0).NumDefs;
336 OffIdx1 -=
get(Opc1).NumDefs;
355 case AMDGPU::DS_READ2ST64_B32:
356 case AMDGPU::DS_READ2ST64_B64:
357 case AMDGPU::DS_WRITE2ST64_B32:
358 case AMDGPU::DS_WRITE2ST64_B64:
373 OffsetIsScalable =
false;
390 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
392 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
393 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
406 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
407 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
408 if (Offset0 + 1 != Offset1)
419 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
427 Offset = EltSize * Offset0;
429 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
430 if (DataOpIdx == -1) {
431 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
433 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
449 if (BaseOp && !BaseOp->
isFI())
457 if (SOffset->
isReg())
463 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
465 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
474 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
475 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
477 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
478 if (VAddr0Idx >= 0) {
480 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
487 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
502 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
519 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
521 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
538 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
546 if (MO1->getAddrSpace() != MO2->getAddrSpace())
549 const auto *Base1 = MO1->getValue();
550 const auto *Base2 = MO2->getValue();
551 if (!Base1 || !Base2)
559 return Base1 == Base2;
563 int64_t Offset1,
bool OffsetIsScalable1,
565 int64_t Offset2,
bool OffsetIsScalable2,
566 unsigned ClusterSize,
567 unsigned NumBytes)
const {
580 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
599 const unsigned LoadSize = NumBytes / ClusterSize;
600 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
601 return NumDWords <= MaxMemoryClusterDWords;
615 int64_t Offset0, int64_t Offset1,
616 unsigned NumLoads)
const {
617 assert(Offset1 > Offset0 &&
618 "Second offset should be larger than first offset!");
623 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
630 const char *Msg =
"illegal VGPR to SGPR copy") {
651 assert((
TII.getSubtarget().hasMAIInsts() &&
652 !
TII.getSubtarget().hasGFX90AInsts()) &&
653 "Expected GFX908 subtarget.");
656 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
657 "Source register of the copy should be either an SGPR or an AGPR.");
660 "Destination register of the copy should be an AGPR.");
669 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
672 if (!Def->modifiesRegister(SrcReg, &RI))
675 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
676 Def->getOperand(0).getReg() != SrcReg)
683 bool SafeToPropagate =
true;
686 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
687 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
688 SafeToPropagate =
false;
690 if (!SafeToPropagate)
693 for (
auto I = Def;
I !=
MI; ++
I)
694 I->clearRegisterKills(DefOp.
getReg(), &RI);
703 if (ImpUseSuperReg) {
704 Builder.addReg(ImpUseSuperReg,
712 RS.enterBasicBlockEnd(
MBB);
713 RS.backward(std::next(
MI));
722 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
725 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
726 "VGPR used for an intermediate copy should have been reserved.");
731 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
741 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
742 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
743 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
750 if (ImpUseSuperReg) {
751 UseBuilder.
addReg(ImpUseSuperReg,
772 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
773 int16_t SubIdx = BaseIndices[Idx];
774 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
775 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
776 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
777 unsigned Opcode = AMDGPU::S_MOV_B32;
780 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
781 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
782 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
786 DestSubReg = RI.getSubReg(DestReg, SubIdx);
787 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
788 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
789 Opcode = AMDGPU::S_MOV_B64;
804 assert(FirstMI && LastMI);
812 LastMI->addRegisterKilled(SrcReg, &RI);
818 Register SrcReg,
bool KillSrc,
bool RenamableDest,
819 bool RenamableSrc)
const {
821 unsigned Size = RI.getRegSizeInBits(*RC);
823 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
829 if (((
Size == 16) != (SrcSize == 16))) {
831 assert(ST.useRealTrue16Insts());
836 if (DestReg == SrcReg) {
842 RC = RI.getPhysRegBaseClass(DestReg);
843 Size = RI.getRegSizeInBits(*RC);
844 SrcRC = RI.getPhysRegBaseClass(SrcReg);
845 SrcSize = RI.getRegSizeInBits(*SrcRC);
849 if (RC == &AMDGPU::VGPR_32RegClass) {
851 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
852 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
853 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
854 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
860 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
861 RC == &AMDGPU::SReg_32RegClass) {
862 if (SrcReg == AMDGPU::SCC) {
869 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (DestReg == AMDGPU::VCC_LO) {
888 if (RC == &AMDGPU::SReg_64RegClass) {
889 if (SrcReg == AMDGPU::SCC) {
896 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
897 if (DestReg == AMDGPU::VCC) {
915 if (DestReg == AMDGPU::SCC) {
918 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
922 assert(ST.hasScalarCompareEq64());
936 if (RC == &AMDGPU::AGPR_32RegClass) {
937 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
938 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
953 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
960 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
961 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
963 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
964 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
965 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
966 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
969 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
970 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
983 if (IsAGPRDst || IsAGPRSrc) {
984 if (!DstLow || !SrcLow) {
986 "Cannot use hi16 subreg with an AGPR!");
993 if (ST.useRealTrue16Insts()) {
999 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1000 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1012 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1013 if (!DstLow || !SrcLow) {
1015 "Cannot use hi16 subreg on VI!");
1038 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1039 if (ST.hasMovB64()) {
1044 if (ST.hasPkMovB32()) {
1060 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1061 if (RI.isSGPRClass(RC)) {
1062 if (!RI.isSGPRClass(SrcRC)) {
1066 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1072 unsigned EltSize = 4;
1073 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1074 if (RI.isAGPRClass(RC)) {
1075 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1076 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1077 else if (RI.hasVGPRs(SrcRC) ||
1078 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1079 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1081 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1082 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1083 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1084 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1085 (RI.isProperlyAlignedRC(*RC) &&
1086 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1088 if (ST.hasMovB64()) {
1089 Opcode = AMDGPU::V_MOV_B64_e32;
1091 }
else if (ST.hasPkMovB32()) {
1092 Opcode = AMDGPU::V_PK_MOV_B32;
1102 std::unique_ptr<RegScavenger> RS;
1103 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1104 RS = std::make_unique<RegScavenger>();
1110 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1111 const bool CanKillSuperReg = KillSrc && !Overlap;
1113 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1116 SubIdx = SubIndices[Idx];
1118 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1119 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1120 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1121 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1123 bool IsFirstSubreg = Idx == 0;
1124 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1126 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1130 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1131 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1177 return &AMDGPU::VGPR_32RegClass;
1189 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1190 "Not a VGPR32 reg");
1192 if (
Cond.size() == 1) {
1193 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1202 }
else if (
Cond.size() == 2) {
1203 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1205 case SIInstrInfo::SCC_TRUE: {
1206 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1216 case SIInstrInfo::SCC_FALSE: {
1217 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1227 case SIInstrInfo::VCCNZ: {
1230 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1241 case SIInstrInfo::VCCZ: {
1244 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 case SIInstrInfo::EXECNZ: {
1256 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1257 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1268 case SIInstrInfo::EXECZ: {
1269 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1270 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1295 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1308 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1318 int64_t &ImmVal)
const {
1319 switch (
MI.getOpcode()) {
1320 case AMDGPU::V_MOV_B32_e32:
1321 case AMDGPU::S_MOV_B32:
1322 case AMDGPU::S_MOVK_I32:
1323 case AMDGPU::S_MOV_B64:
1324 case AMDGPU::V_MOV_B64_e32:
1325 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1326 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1327 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1328 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1329 case AMDGPU::V_MOV_B64_PSEUDO:
1330 case AMDGPU::V_MOV_B16_t16_e32: {
1334 return MI.getOperand(0).getReg() == Reg;
1339 case AMDGPU::V_MOV_B16_t16_e64: {
1341 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1343 return MI.getOperand(0).getReg() == Reg;
1348 case AMDGPU::S_BREV_B32:
1349 case AMDGPU::V_BFREV_B32_e32:
1350 case AMDGPU::V_BFREV_B32_e64: {
1354 return MI.getOperand(0).getReg() == Reg;
1359 case AMDGPU::S_NOT_B32:
1360 case AMDGPU::V_NOT_B32_e32:
1361 case AMDGPU::V_NOT_B32_e64: {
1364 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1365 return MI.getOperand(0).getReg() == Reg;
1377 if (RI.isAGPRClass(DstRC))
1378 return AMDGPU::COPY;
1379 if (RI.getRegSizeInBits(*DstRC) == 16) {
1382 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1384 if (RI.getRegSizeInBits(*DstRC) == 32)
1385 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1386 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1387 return AMDGPU::S_MOV_B64;
1388 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1389 return AMDGPU::V_MOV_B64_PSEUDO;
1390 return AMDGPU::COPY;
1395 bool IsIndirectSrc)
const {
1396 if (IsIndirectSrc) {
1398 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1400 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1423 if (VecSize <= 1024)
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1455 if (VecSize <= 1024)
1456 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1483 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1485 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1487 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1488 if (VecSize <= 1024)
1489 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1512 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1521 if (VecSize <= 1024)
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1529 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1531 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1533 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1535 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1536 if (VecSize <= 1024)
1537 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1544 bool IsSGPR)
const {
1556 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1563 return AMDGPU::SI_SPILL_S32_SAVE;
1565 return AMDGPU::SI_SPILL_S64_SAVE;
1567 return AMDGPU::SI_SPILL_S96_SAVE;
1569 return AMDGPU::SI_SPILL_S128_SAVE;
1571 return AMDGPU::SI_SPILL_S160_SAVE;
1573 return AMDGPU::SI_SPILL_S192_SAVE;
1575 return AMDGPU::SI_SPILL_S224_SAVE;
1577 return AMDGPU::SI_SPILL_S256_SAVE;
1579 return AMDGPU::SI_SPILL_S288_SAVE;
1581 return AMDGPU::SI_SPILL_S320_SAVE;
1583 return AMDGPU::SI_SPILL_S352_SAVE;
1585 return AMDGPU::SI_SPILL_S384_SAVE;
1587 return AMDGPU::SI_SPILL_S512_SAVE;
1589 return AMDGPU::SI_SPILL_S1024_SAVE;
1598 return AMDGPU::SI_SPILL_V16_SAVE;
1600 return AMDGPU::SI_SPILL_V32_SAVE;
1602 return AMDGPU::SI_SPILL_V64_SAVE;
1604 return AMDGPU::SI_SPILL_V96_SAVE;
1606 return AMDGPU::SI_SPILL_V128_SAVE;
1608 return AMDGPU::SI_SPILL_V160_SAVE;
1610 return AMDGPU::SI_SPILL_V192_SAVE;
1612 return AMDGPU::SI_SPILL_V224_SAVE;
1614 return AMDGPU::SI_SPILL_V256_SAVE;
1616 return AMDGPU::SI_SPILL_V288_SAVE;
1618 return AMDGPU::SI_SPILL_V320_SAVE;
1620 return AMDGPU::SI_SPILL_V352_SAVE;
1622 return AMDGPU::SI_SPILL_V384_SAVE;
1624 return AMDGPU::SI_SPILL_V512_SAVE;
1626 return AMDGPU::SI_SPILL_V1024_SAVE;
1635 return AMDGPU::SI_SPILL_AV32_SAVE;
1637 return AMDGPU::SI_SPILL_AV64_SAVE;
1639 return AMDGPU::SI_SPILL_AV96_SAVE;
1641 return AMDGPU::SI_SPILL_AV128_SAVE;
1643 return AMDGPU::SI_SPILL_AV160_SAVE;
1645 return AMDGPU::SI_SPILL_AV192_SAVE;
1647 return AMDGPU::SI_SPILL_AV224_SAVE;
1649 return AMDGPU::SI_SPILL_AV256_SAVE;
1651 return AMDGPU::SI_SPILL_AV288_SAVE;
1653 return AMDGPU::SI_SPILL_AV320_SAVE;
1655 return AMDGPU::SI_SPILL_AV352_SAVE;
1657 return AMDGPU::SI_SPILL_AV384_SAVE;
1659 return AMDGPU::SI_SPILL_AV512_SAVE;
1661 return AMDGPU::SI_SPILL_AV1024_SAVE;
1668 bool IsVectorSuperClass) {
1673 if (IsVectorSuperClass)
1674 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1676 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1682 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1689 if (ST.hasMAIInsts())
1708 FrameInfo.getObjectAlign(FrameIndex));
1709 unsigned SpillSize = RI.getSpillSize(*RC);
1712 if (RI.isSGPRClass(RC)) {
1714 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1715 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1716 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1724 if (SrcReg.
isVirtual() && SpillSize == 4) {
1725 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1734 if (RI.spillSGPRToVGPR())
1754 return AMDGPU::SI_SPILL_S32_RESTORE;
1756 return AMDGPU::SI_SPILL_S64_RESTORE;
1758 return AMDGPU::SI_SPILL_S96_RESTORE;
1760 return AMDGPU::SI_SPILL_S128_RESTORE;
1762 return AMDGPU::SI_SPILL_S160_RESTORE;
1764 return AMDGPU::SI_SPILL_S192_RESTORE;
1766 return AMDGPU::SI_SPILL_S224_RESTORE;
1768 return AMDGPU::SI_SPILL_S256_RESTORE;
1770 return AMDGPU::SI_SPILL_S288_RESTORE;
1772 return AMDGPU::SI_SPILL_S320_RESTORE;
1774 return AMDGPU::SI_SPILL_S352_RESTORE;
1776 return AMDGPU::SI_SPILL_S384_RESTORE;
1778 return AMDGPU::SI_SPILL_S512_RESTORE;
1780 return AMDGPU::SI_SPILL_S1024_RESTORE;
1789 return AMDGPU::SI_SPILL_V16_RESTORE;
1791 return AMDGPU::SI_SPILL_V32_RESTORE;
1793 return AMDGPU::SI_SPILL_V64_RESTORE;
1795 return AMDGPU::SI_SPILL_V96_RESTORE;
1797 return AMDGPU::SI_SPILL_V128_RESTORE;
1799 return AMDGPU::SI_SPILL_V160_RESTORE;
1801 return AMDGPU::SI_SPILL_V192_RESTORE;
1803 return AMDGPU::SI_SPILL_V224_RESTORE;
1805 return AMDGPU::SI_SPILL_V256_RESTORE;
1807 return AMDGPU::SI_SPILL_V288_RESTORE;
1809 return AMDGPU::SI_SPILL_V320_RESTORE;
1811 return AMDGPU::SI_SPILL_V352_RESTORE;
1813 return AMDGPU::SI_SPILL_V384_RESTORE;
1815 return AMDGPU::SI_SPILL_V512_RESTORE;
1817 return AMDGPU::SI_SPILL_V1024_RESTORE;
1826 return AMDGPU::SI_SPILL_AV32_RESTORE;
1828 return AMDGPU::SI_SPILL_AV64_RESTORE;
1830 return AMDGPU::SI_SPILL_AV96_RESTORE;
1832 return AMDGPU::SI_SPILL_AV128_RESTORE;
1834 return AMDGPU::SI_SPILL_AV160_RESTORE;
1836 return AMDGPU::SI_SPILL_AV192_RESTORE;
1838 return AMDGPU::SI_SPILL_AV224_RESTORE;
1840 return AMDGPU::SI_SPILL_AV256_RESTORE;
1842 return AMDGPU::SI_SPILL_AV288_RESTORE;
1844 return AMDGPU::SI_SPILL_AV320_RESTORE;
1846 return AMDGPU::SI_SPILL_AV352_RESTORE;
1848 return AMDGPU::SI_SPILL_AV384_RESTORE;
1850 return AMDGPU::SI_SPILL_AV512_RESTORE;
1852 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1859 bool IsVectorSuperClass) {
1864 if (IsVectorSuperClass)
1865 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1867 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1873 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1880 if (ST.hasMAIInsts())
1883 assert(!RI.isAGPRClass(RC));
1897 unsigned SpillSize = RI.getSpillSize(*RC);
1904 FrameInfo.getObjectAlign(FrameIndex));
1906 if (RI.isSGPRClass(RC)) {
1908 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1909 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1910 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1915 if (DestReg.
isVirtual() && SpillSize == 4) {
1917 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1920 if (RI.spillSGPRToVGPR())
1946 unsigned Quantity)
const {
1948 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1949 while (Quantity > 0) {
1950 unsigned Arg = std::min(Quantity, MaxSNopCount);
1957 auto *MF =
MBB.getParent();
1960 assert(Info->isEntryFunction());
1962 if (
MBB.succ_empty()) {
1963 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1964 if (HasNoTerminator) {
1965 if (Info->returnsVoid()) {
1979 constexpr unsigned DoorbellIDMask = 0x3ff;
1980 constexpr unsigned ECQueueWaveAbort = 0x400;
1985 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1986 MBB.splitAt(
MI,
false);
1990 MBB.addSuccessor(TrapBB);
1996 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2000 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2003 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2004 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2008 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2009 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2010 .
addUse(DoorbellRegMasked)
2011 .
addImm(ECQueueWaveAbort);
2012 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2013 .
addUse(SetWaveAbortBit);
2016 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2027 return MBB.getNextNode();
2031 switch (
MI.getOpcode()) {
2033 if (
MI.isMetaInstruction())
2038 return MI.getOperand(0).getImm() + 1;
2048 switch (
MI.getOpcode()) {
2050 case AMDGPU::S_MOV_B64_term:
2053 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2056 case AMDGPU::S_MOV_B32_term:
2059 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2062 case AMDGPU::S_XOR_B64_term:
2065 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2068 case AMDGPU::S_XOR_B32_term:
2071 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2073 case AMDGPU::S_OR_B64_term:
2076 MI.setDesc(
get(AMDGPU::S_OR_B64));
2078 case AMDGPU::S_OR_B32_term:
2081 MI.setDesc(
get(AMDGPU::S_OR_B32));
2084 case AMDGPU::S_ANDN2_B64_term:
2087 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2090 case AMDGPU::S_ANDN2_B32_term:
2093 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2096 case AMDGPU::S_AND_B64_term:
2099 MI.setDesc(
get(AMDGPU::S_AND_B64));
2102 case AMDGPU::S_AND_B32_term:
2105 MI.setDesc(
get(AMDGPU::S_AND_B32));
2108 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2111 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2114 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2117 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2120 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2121 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2124 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2125 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2127 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2131 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2134 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2137 int64_t Imm =
MI.getOperand(1).getImm();
2139 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2140 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2147 MI.eraseFromParent();
2153 case AMDGPU::V_MOV_B64_PSEUDO: {
2155 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2156 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2161 if (ST.hasMovB64()) {
2162 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2167 if (
SrcOp.isImm()) {
2169 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2170 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2192 if (ST.hasPkMovB32() &&
2213 MI.eraseFromParent();
2216 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2220 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2224 if (ST.has64BitLiterals()) {
2225 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2231 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2236 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2237 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2239 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2240 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2247 MI.eraseFromParent();
2250 case AMDGPU::V_SET_INACTIVE_B32: {
2254 .
add(
MI.getOperand(3))
2255 .
add(
MI.getOperand(4))
2256 .
add(
MI.getOperand(1))
2257 .
add(
MI.getOperand(2))
2258 .
add(
MI.getOperand(5));
2259 MI.eraseFromParent();
2262 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2263 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2264 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2265 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2266 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2267 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2268 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2269 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2270 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2271 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2272 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2273 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2274 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2275 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2276 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2277 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2278 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2279 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2280 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2281 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2282 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2283 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2284 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2285 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2286 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2287 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2288 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2289 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2290 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2291 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2292 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2293 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2294 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2298 if (RI.hasVGPRs(EltRC)) {
2299 Opc = AMDGPU::V_MOVRELD_B32_e32;
2301 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2302 : AMDGPU::S_MOVRELD_B32;
2307 bool IsUndef =
MI.getOperand(1).isUndef();
2308 unsigned SubReg =
MI.getOperand(3).getImm();
2309 assert(VecReg ==
MI.getOperand(1).getReg());
2314 .
add(
MI.getOperand(2))
2318 const int ImpDefIdx =
2320 const int ImpUseIdx = ImpDefIdx + 1;
2322 MI.eraseFromParent();
2325 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2326 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2327 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2328 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2329 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2330 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2331 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2332 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2333 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2334 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2335 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2336 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2337 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2338 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2339 assert(ST.useVGPRIndexMode());
2341 bool IsUndef =
MI.getOperand(1).isUndef();
2350 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2354 .
add(
MI.getOperand(2))
2358 const int ImpDefIdx =
2360 const int ImpUseIdx = ImpDefIdx + 1;
2367 MI.eraseFromParent();
2370 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2371 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2372 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2373 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2374 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2375 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2376 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2377 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2378 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2379 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2380 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2381 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2382 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2383 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2384 assert(ST.useVGPRIndexMode());
2387 bool IsUndef =
MI.getOperand(1).isUndef();
2405 MI.eraseFromParent();
2408 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2411 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2412 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2431 if (ST.hasGetPCZeroExtension()) {
2435 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2442 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2452 MI.eraseFromParent();
2455 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2465 Op.setOffset(
Op.getOffset() + 4);
2467 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2471 MI.eraseFromParent();
2474 case AMDGPU::ENTER_STRICT_WWM: {
2480 case AMDGPU::ENTER_STRICT_WQM: {
2487 MI.eraseFromParent();
2490 case AMDGPU::EXIT_STRICT_WWM:
2491 case AMDGPU::EXIT_STRICT_WQM: {
2497 case AMDGPU::SI_RETURN: {
2511 MI.eraseFromParent();
2515 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2516 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2517 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2520 case AMDGPU::S_GETPC_B64_pseudo:
2521 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2522 if (ST.hasGetPCZeroExtension()) {
2524 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2533 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2534 assert(ST.hasBF16PackedInsts());
2535 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2559 case AMDGPU::S_LOAD_DWORDX16_IMM:
2560 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2573 for (
auto &CandMO :
I->operands()) {
2574 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2582 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2586 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2590 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2592 unsigned NewOpcode = -1;
2593 if (SubregSize == 256)
2594 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2595 else if (SubregSize == 128)
2596 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2603 MRI.setRegClass(DestReg, NewRC);
2606 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2611 MI->getOperand(0).setReg(DestReg);
2612 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2616 OffsetMO->
setImm(FinalOffset);
2622 MI->setMemRefs(*MF, NewMMOs);
2635std::pair<MachineInstr*, MachineInstr*>
2637 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2639 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2642 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2643 return std::pair(&
MI,
nullptr);
2654 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2656 if (Dst.isPhysical()) {
2657 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2660 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2664 for (
unsigned I = 1;
I <= 2; ++
I) {
2667 if (
SrcOp.isImm()) {
2669 Imm.ashrInPlace(Part * 32);
2670 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2674 if (Src.isPhysical())
2675 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2682 MovDPP.addImm(MO.getImm());
2684 Split[Part] = MovDPP;
2688 if (Dst.isVirtual())
2695 MI.eraseFromParent();
2696 return std::pair(Split[0], Split[1]);
2699std::optional<DestSourcePair>
2701 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2704 return std::nullopt;
2708 AMDGPU::OpName Src0OpName,
2710 AMDGPU::OpName Src1OpName)
const {
2717 "All commutable instructions have both src0 and src1 modifiers");
2719 int Src0ModsVal = Src0Mods->
getImm();
2720 int Src1ModsVal = Src1Mods->
getImm();
2722 Src1Mods->
setImm(Src0ModsVal);
2723 Src0Mods->
setImm(Src1ModsVal);
2732 bool IsKill = RegOp.
isKill();
2734 bool IsUndef = RegOp.
isUndef();
2735 bool IsDebug = RegOp.
isDebug();
2737 if (NonRegOp.
isImm())
2739 else if (NonRegOp.
isFI())
2760 int64_t NonRegVal = NonRegOp1.
getImm();
2763 NonRegOp2.
setImm(NonRegVal);
2770 unsigned OpIdx1)
const {
2775 unsigned Opc =
MI.getOpcode();
2776 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2786 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2789 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2794 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2800 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2815 unsigned Src1Idx)
const {
2816 assert(!NewMI &&
"this should never be used");
2818 unsigned Opc =
MI.getOpcode();
2820 if (CommutedOpcode == -1)
2823 if (Src0Idx > Src1Idx)
2826 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2827 static_cast<int>(Src0Idx) &&
2828 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2829 static_cast<int>(Src1Idx) &&
2830 "inconsistency with findCommutedOpIndices");
2855 Src1, AMDGPU::OpName::src1_modifiers);
2858 AMDGPU::OpName::src1_sel);
2870 unsigned &SrcOpIdx0,
2871 unsigned &SrcOpIdx1)
const {
2876 unsigned &SrcOpIdx0,
2877 unsigned &SrcOpIdx1)
const {
2878 if (!
Desc.isCommutable())
2881 unsigned Opc =
Desc.getOpcode();
2882 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2886 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2890 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2894 int64_t BrOffset)
const {
2911 return MI.getOperand(0).getMBB();
2916 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2917 MI.getOpcode() == AMDGPU::SI_LOOP)
2929 "new block should be inserted for expanding unconditional branch");
2932 "restore block should be inserted for restoring clobbered registers");
2940 if (ST.useAddPC64Inst()) {
2942 MCCtx.createTempSymbol(
"offset",
true);
2946 MCCtx.createTempSymbol(
"post_addpc",
true);
2947 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2951 Offset->setVariableValue(OffsetExpr);
2955 assert(RS &&
"RegScavenger required for long branching");
2959 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2963 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2964 ST.hasVALUReadSGPRHazard();
2965 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2966 if (FlushSGPRWrites)
2974 ApplyHazardWorkarounds();
2977 MCCtx.createTempSymbol(
"post_getpc",
true);
2981 MCCtx.createTempSymbol(
"offset_lo",
true);
2983 MCCtx.createTempSymbol(
"offset_hi",
true);
2986 .
addReg(PCReg, 0, AMDGPU::sub0)
2990 .
addReg(PCReg, 0, AMDGPU::sub1)
2992 ApplyHazardWorkarounds();
3033 if (LongBranchReservedReg) {
3034 RS->enterBasicBlock(
MBB);
3035 Scav = LongBranchReservedReg;
3037 RS->enterBasicBlockEnd(
MBB);
3038 Scav = RS->scavengeRegisterBackwards(
3043 RS->setRegUsed(Scav);
3044 MRI.replaceRegWith(PCReg, Scav);
3045 MRI.clearVirtRegs();
3051 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3052 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3053 MRI.clearVirtRegs();
3068unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3070 case SIInstrInfo::SCC_TRUE:
3071 return AMDGPU::S_CBRANCH_SCC1;
3072 case SIInstrInfo::SCC_FALSE:
3073 return AMDGPU::S_CBRANCH_SCC0;
3074 case SIInstrInfo::VCCNZ:
3075 return AMDGPU::S_CBRANCH_VCCNZ;
3076 case SIInstrInfo::VCCZ:
3077 return AMDGPU::S_CBRANCH_VCCZ;
3078 case SIInstrInfo::EXECNZ:
3079 return AMDGPU::S_CBRANCH_EXECNZ;
3080 case SIInstrInfo::EXECZ:
3081 return AMDGPU::S_CBRANCH_EXECZ;
3087SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3089 case AMDGPU::S_CBRANCH_SCC0:
3091 case AMDGPU::S_CBRANCH_SCC1:
3093 case AMDGPU::S_CBRANCH_VCCNZ:
3095 case AMDGPU::S_CBRANCH_VCCZ:
3097 case AMDGPU::S_CBRANCH_EXECNZ:
3099 case AMDGPU::S_CBRANCH_EXECZ:
3111 bool AllowModify)
const {
3112 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3114 TBB =
I->getOperand(0).getMBB();
3118 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3119 if (Pred == INVALID_BR)
3124 Cond.push_back(
I->getOperand(1));
3128 if (
I ==
MBB.end()) {
3134 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3136 FBB =
I->getOperand(0).getMBB();
3146 bool AllowModify)
const {
3154 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3155 switch (
I->getOpcode()) {
3156 case AMDGPU::S_MOV_B64_term:
3157 case AMDGPU::S_XOR_B64_term:
3158 case AMDGPU::S_OR_B64_term:
3159 case AMDGPU::S_ANDN2_B64_term:
3160 case AMDGPU::S_AND_B64_term:
3161 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3162 case AMDGPU::S_MOV_B32_term:
3163 case AMDGPU::S_XOR_B32_term:
3164 case AMDGPU::S_OR_B32_term:
3165 case AMDGPU::S_ANDN2_B32_term:
3166 case AMDGPU::S_AND_B32_term:
3167 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3170 case AMDGPU::SI_ELSE:
3171 case AMDGPU::SI_KILL_I1_TERMINATOR:
3172 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3189 int *BytesRemoved)
const {
3191 unsigned RemovedSize = 0;
3194 if (
MI.isBranch() ||
MI.isReturn()) {
3196 MI.eraseFromParent();
3202 *BytesRemoved = RemovedSize;
3219 int *BytesAdded)
const {
3220 if (!FBB &&
Cond.empty()) {
3224 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3231 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3243 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3261 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3268 if (
Cond.size() != 2) {
3272 if (
Cond[0].isImm()) {
3283 Register FalseReg,
int &CondCycles,
3284 int &TrueCycles,
int &FalseCycles)
const {
3290 if (
MRI.getRegClass(FalseReg) != RC)
3294 CondCycles = TrueCycles = FalseCycles = NumInsts;
3297 return RI.hasVGPRs(RC) && NumInsts <= 6;
3305 if (
MRI.getRegClass(FalseReg) != RC)
3311 if (NumInsts % 2 == 0)
3314 CondCycles = TrueCycles = FalseCycles = NumInsts;
3315 return RI.isSGPRClass(RC);
3326 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3327 if (Pred == VCCZ || Pred == SCC_FALSE) {
3328 Pred =
static_cast<BranchPredicate
>(-Pred);
3334 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3336 if (DstSize == 32) {
3338 if (Pred == SCC_TRUE) {
3353 if (DstSize == 64 && Pred == SCC_TRUE) {
3363 static const int16_t Sub0_15[] = {
3364 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3365 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3366 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3367 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3370 static const int16_t Sub0_15_64[] = {
3371 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3372 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3373 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3374 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3377 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3379 const int16_t *SubIndices = Sub0_15;
3380 int NElts = DstSize / 32;
3384 if (Pred == SCC_TRUE) {
3386 SelOp = AMDGPU::S_CSELECT_B32;
3387 EltRC = &AMDGPU::SGPR_32RegClass;
3389 SelOp = AMDGPU::S_CSELECT_B64;
3390 EltRC = &AMDGPU::SGPR_64RegClass;
3391 SubIndices = Sub0_15_64;
3397 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3402 for (
int Idx = 0; Idx != NElts; ++Idx) {
3403 Register DstElt =
MRI.createVirtualRegister(EltRC);
3406 unsigned SubIdx = SubIndices[Idx];
3409 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3412 .
addReg(FalseReg, 0, SubIdx)
3413 .
addReg(TrueReg, 0, SubIdx);
3417 .
addReg(TrueReg, 0, SubIdx)
3418 .
addReg(FalseReg, 0, SubIdx);
3430 switch (
MI.getOpcode()) {
3431 case AMDGPU::V_MOV_B16_t16_e32:
3432 case AMDGPU::V_MOV_B16_t16_e64:
3433 case AMDGPU::V_MOV_B32_e32:
3434 case AMDGPU::V_MOV_B32_e64:
3435 case AMDGPU::V_MOV_B64_PSEUDO:
3436 case AMDGPU::V_MOV_B64_e32:
3437 case AMDGPU::V_MOV_B64_e64:
3438 case AMDGPU::S_MOV_B32:
3439 case AMDGPU::S_MOV_B64:
3440 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3442 case AMDGPU::WWM_COPY:
3443 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3444 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3445 case AMDGPU::V_ACCVGPR_MOV_B32:
3446 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3447 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3455 switch (
MI.getOpcode()) {
3456 case AMDGPU::V_MOV_B16_t16_e32:
3457 case AMDGPU::V_MOV_B16_t16_e64:
3459 case AMDGPU::V_MOV_B32_e32:
3460 case AMDGPU::V_MOV_B32_e64:
3461 case AMDGPU::V_MOV_B64_PSEUDO:
3462 case AMDGPU::V_MOV_B64_e32:
3463 case AMDGPU::V_MOV_B64_e64:
3464 case AMDGPU::S_MOV_B32:
3465 case AMDGPU::S_MOV_B64:
3466 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3468 case AMDGPU::WWM_COPY:
3469 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3470 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3471 case AMDGPU::V_ACCVGPR_MOV_B32:
3472 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3473 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3481 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3482 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3483 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3486 unsigned Opc =
MI.getOpcode();
3488 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3490 MI.removeOperand(Idx);
3496 MI.setDesc(NewDesc);
3502 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3503 Desc.implicit_defs().size();
3505 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3506 MI.removeOperand(
I);
3510 unsigned SubRegIndex) {
3511 switch (SubRegIndex) {
3512 case AMDGPU::NoSubRegister:
3522 case AMDGPU::sub1_lo16:
3524 case AMDGPU::sub1_hi16:
3527 return std::nullopt;
3535 case AMDGPU::V_MAC_F16_e32:
3536 case AMDGPU::V_MAC_F16_e64:
3537 case AMDGPU::V_MAD_F16_e64:
3538 return AMDGPU::V_MADAK_F16;
3539 case AMDGPU::V_MAC_F32_e32:
3540 case AMDGPU::V_MAC_F32_e64:
3541 case AMDGPU::V_MAD_F32_e64:
3542 return AMDGPU::V_MADAK_F32;
3543 case AMDGPU::V_FMAC_F32_e32:
3544 case AMDGPU::V_FMAC_F32_e64:
3545 case AMDGPU::V_FMA_F32_e64:
3546 return AMDGPU::V_FMAAK_F32;
3547 case AMDGPU::V_FMAC_F16_e32:
3548 case AMDGPU::V_FMAC_F16_e64:
3549 case AMDGPU::V_FMAC_F16_t16_e64:
3550 case AMDGPU::V_FMAC_F16_fake16_e64:
3551 case AMDGPU::V_FMAC_F16_t16_e32:
3552 case AMDGPU::V_FMAC_F16_fake16_e32:
3553 case AMDGPU::V_FMA_F16_e64:
3554 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3555 ? AMDGPU::V_FMAAK_F16_t16
3556 : AMDGPU::V_FMAAK_F16_fake16
3557 : AMDGPU::V_FMAAK_F16;
3558 case AMDGPU::V_FMAC_F64_e32:
3559 case AMDGPU::V_FMAC_F64_e64:
3560 case AMDGPU::V_FMA_F64_e64:
3561 return AMDGPU::V_FMAAK_F64;
3569 case AMDGPU::V_MAC_F16_e32:
3570 case AMDGPU::V_MAC_F16_e64:
3571 case AMDGPU::V_MAD_F16_e64:
3572 return AMDGPU::V_MADMK_F16;
3573 case AMDGPU::V_MAC_F32_e32:
3574 case AMDGPU::V_MAC_F32_e64:
3575 case AMDGPU::V_MAD_F32_e64:
3576 return AMDGPU::V_MADMK_F32;
3577 case AMDGPU::V_FMAC_F32_e32:
3578 case AMDGPU::V_FMAC_F32_e64:
3579 case AMDGPU::V_FMA_F32_e64:
3580 return AMDGPU::V_FMAMK_F32;
3581 case AMDGPU::V_FMAC_F16_e32:
3582 case AMDGPU::V_FMAC_F16_e64:
3583 case AMDGPU::V_FMAC_F16_t16_e64:
3584 case AMDGPU::V_FMAC_F16_fake16_e64:
3585 case AMDGPU::V_FMAC_F16_t16_e32:
3586 case AMDGPU::V_FMAC_F16_fake16_e32:
3587 case AMDGPU::V_FMA_F16_e64:
3588 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3589 ? AMDGPU::V_FMAMK_F16_t16
3590 : AMDGPU::V_FMAMK_F16_fake16
3591 : AMDGPU::V_FMAMK_F16;
3592 case AMDGPU::V_FMAC_F64_e32:
3593 case AMDGPU::V_FMAC_F64_e64:
3594 case AMDGPU::V_FMA_F64_e64:
3595 return AMDGPU::V_FMAMK_F64;
3607 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3609 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3612 if (
Opc == AMDGPU::COPY) {
3613 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3620 if (HasMultipleUses) {
3623 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3626 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3634 if (ImmDefSize == 32 &&
3639 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3640 RI.getSubRegIdxSize(UseSubReg) == 16;
3643 if (RI.hasVGPRs(DstRC))
3646 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3652 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3659 for (
unsigned MovOp :
3660 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3661 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3669 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3673 if (MovDstPhysReg) {
3677 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3684 if (MovDstPhysReg) {
3685 if (!MovDstRC->
contains(MovDstPhysReg))
3687 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3701 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3709 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3713 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3715 UseMI.getOperand(0).setReg(MovDstPhysReg);
3720 UseMI.setDesc(NewMCID);
3721 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3722 UseMI.addImplicitDefUseOperands(*MF);
3726 if (HasMultipleUses)
3729 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3730 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3731 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3732 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3733 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3734 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3735 Opc == AMDGPU::V_FMAC_F64_e64) {
3744 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3755 auto CopyRegOperandToNarrowerRC =
3758 if (!
MI.getOperand(OpNo).isReg())
3762 if (RI.getCommonSubClass(RC, NewRC) != NewRC)
3765 BuildMI(*
MI.getParent(),
MI.getIterator(),
MI.getDebugLoc(),
3766 get(AMDGPU::COPY), Tmp)
3768 MI.getOperand(OpNo).setReg(Tmp);
3769 MI.getOperand(OpNo).setIsKill();
3776 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3777 if (!RegSrc->
isReg())
3779 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3780 ST.getConstantBusLimit(
Opc) < 2)
3783 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3795 if (Def && Def->isMoveImmediate() &&
3810 unsigned SrcSubReg = RegSrc->
getSubReg();
3815 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3816 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3817 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3818 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3819 UseMI.untieRegOperand(
3820 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3827 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3828 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3832 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3833 UseMI.getOperand(0).getReg())
3835 UseMI.getOperand(0).setReg(Tmp);
3836 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3837 CopyRegOperandToNarrowerRC(
UseMI, 3, NewRC);
3840 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3842 DefMI.eraseFromParent();
3849 if (ST.getConstantBusLimit(
Opc) < 2) {
3852 bool Src0Inlined =
false;
3853 if (Src0->
isReg()) {
3858 if (Def && Def->isMoveImmediate() &&
3863 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3870 if (Src1->
isReg() && !Src0Inlined) {
3873 if (Def && Def->isMoveImmediate() &&
3875 MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
3877 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3890 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3891 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3892 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3893 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3894 UseMI.untieRegOperand(
3895 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3897 const std::optional<int64_t> SubRegImm =
3907 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3908 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
3912 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3913 UseMI.getOperand(0).getReg())
3915 UseMI.getOperand(0).setReg(Tmp);
3916 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3917 CopyRegOperandToNarrowerRC(
UseMI, 2, NewRC);
3925 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3927 DefMI.eraseFromParent();
3939 if (BaseOps1.
size() != BaseOps2.
size())
3941 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3942 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3950 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3951 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3952 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3954 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3957bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3960 int64_t Offset0, Offset1;
3963 bool Offset0IsScalable, Offset1IsScalable;
3977 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3978 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3985 "MIa must load from or modify a memory location");
3987 "MIb must load from or modify a memory location");
4009 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4016 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4026 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4040 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4051 if (
Reg.isPhysical())
4053 auto *Def =
MRI.getUniqueVRegDef(
Reg);
4055 Imm = Def->getOperand(1).getImm();
4075 unsigned NumOps =
MI.getNumOperands();
4078 if (
Op.isReg() &&
Op.isKill())
4086 case AMDGPU::V_MAC_F16_e32:
4087 case AMDGPU::V_MAC_F16_e64:
4088 return AMDGPU::V_MAD_F16_e64;
4089 case AMDGPU::V_MAC_F32_e32:
4090 case AMDGPU::V_MAC_F32_e64:
4091 return AMDGPU::V_MAD_F32_e64;
4092 case AMDGPU::V_MAC_LEGACY_F32_e32:
4093 case AMDGPU::V_MAC_LEGACY_F32_e64:
4094 return AMDGPU::V_MAD_LEGACY_F32_e64;
4095 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4096 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4097 return AMDGPU::V_FMA_LEGACY_F32_e64;
4098 case AMDGPU::V_FMAC_F16_e32:
4099 case AMDGPU::V_FMAC_F16_e64:
4100 case AMDGPU::V_FMAC_F16_t16_e64:
4101 case AMDGPU::V_FMAC_F16_fake16_e64:
4102 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4103 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4104 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4105 : AMDGPU::V_FMA_F16_gfx9_e64;
4106 case AMDGPU::V_FMAC_F32_e32:
4107 case AMDGPU::V_FMAC_F32_e64:
4108 return AMDGPU::V_FMA_F32_e64;
4109 case AMDGPU::V_FMAC_F64_e32:
4110 case AMDGPU::V_FMAC_F64_e64:
4111 return AMDGPU::V_FMA_F64_e64;
4131 if (
MI.isBundle()) {
4134 if (
MI.getBundleSize() != 1)
4136 CandidateMI =
MI.getNextNode();
4140 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4144 if (
MI.isBundle()) {
4149 MI.untieRegOperand(MO.getOperandNo());
4157 if (Def.isEarlyClobber() && Def.isReg() &&
4162 auto UpdateDefIndex = [&](
LiveRange &LR) {
4163 auto *S = LR.find(OldIndex);
4164 if (S != LR.end() && S->start == OldIndex) {
4165 assert(S->valno && S->valno->def == OldIndex);
4166 S->start = NewIndex;
4167 S->valno->def = NewIndex;
4171 for (
auto &SR : LI.subranges())
4177 if (U.RemoveMIUse) {
4180 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4182 if (
MRI.hasOneNonDBGUse(DefReg)) {
4184 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4185 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4186 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4187 U.RemoveMIUse->removeOperand(
I);
4192 if (
MI.isBundle()) {
4196 if (MO.isReg() && MO.getReg() == DefReg) {
4197 assert(MO.getSubReg() == 0 &&
4198 "tied sub-registers in bundles currently not supported");
4199 MI.removeOperand(MO.getOperandNo());
4214 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4216 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4217 MIOp.setIsUndef(
true);
4218 MIOp.setReg(DummyReg);
4222 if (
MI.isBundle()) {
4226 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4227 MIOp.setIsUndef(
true);
4228 MIOp.setReg(DummyReg);
4241 return MI.isBundle() ? &
MI : NewMI;
4246 ThreeAddressUpdates &U)
const {
4248 unsigned Opc =
MI.getOpcode();
4252 if (NewMFMAOpc != -1) {
4255 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4256 MIB.
add(
MI.getOperand(
I));
4264 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4269 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4270 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4271 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4275 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4276 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4277 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4278 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4279 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4280 bool Src0Literal =
false;
4285 case AMDGPU::V_MAC_F16_e64:
4286 case AMDGPU::V_FMAC_F16_e64:
4287 case AMDGPU::V_FMAC_F16_t16_e64:
4288 case AMDGPU::V_FMAC_F16_fake16_e64:
4289 case AMDGPU::V_MAC_F32_e64:
4290 case AMDGPU::V_MAC_LEGACY_F32_e64:
4291 case AMDGPU::V_FMAC_F32_e64:
4292 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4293 case AMDGPU::V_FMAC_F64_e64:
4295 case AMDGPU::V_MAC_F16_e32:
4296 case AMDGPU::V_FMAC_F16_e32:
4297 case AMDGPU::V_MAC_F32_e32:
4298 case AMDGPU::V_MAC_LEGACY_F32_e32:
4299 case AMDGPU::V_FMAC_F32_e32:
4300 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4301 case AMDGPU::V_FMAC_F64_e32: {
4302 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4303 AMDGPU::OpName::src0);
4304 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4315 MachineInstrBuilder MIB;
4318 const MachineOperand *Src0Mods =
4321 const MachineOperand *Src1Mods =
4324 const MachineOperand *Src2Mods =
4330 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4331 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4333 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4335 MachineInstr *
DefMI;
4371 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4387 if (Src0Literal && !ST.hasVOP3Literal())
4415 switch (
MI.getOpcode()) {
4416 case AMDGPU::S_SET_GPR_IDX_ON:
4417 case AMDGPU::S_SET_GPR_IDX_MODE:
4418 case AMDGPU::S_SET_GPR_IDX_OFF:
4436 if (
MI.isTerminator() ||
MI.isPosition())
4440 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4443 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4449 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4450 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4451 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4452 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4453 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4458 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4459 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4460 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4469 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4478 if (
MI.memoperands_empty())
4483 unsigned AS = Memop->getAddrSpace();
4484 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4485 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4486 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4487 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4502 if (
MI.memoperands_empty())
4511 unsigned AS = Memop->getAddrSpace();
4528 if (ST.isTgSplitEnabled())
4533 if (
MI.memoperands_empty())
4538 unsigned AS = Memop->getAddrSpace();
4554 unsigned Opcode =
MI.getOpcode();
4569 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4570 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4571 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4574 if (
MI.isCall() ||
MI.isInlineAsm())
4590 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4591 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4592 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4593 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4601 if (
MI.isMetaInstruction())
4605 if (
MI.isCopyLike()) {
4606 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4610 return MI.readsRegister(AMDGPU::EXEC, &RI);
4621 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4625 switch (Imm.getBitWidth()) {
4631 ST.hasInv2PiInlineImm());
4634 ST.hasInv2PiInlineImm());
4636 return ST.has16BitInsts() &&
4638 ST.hasInv2PiInlineImm());
4645 APInt IntImm = Imm.bitcastToAPInt();
4647 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4655 return ST.has16BitInsts() &&
4658 return ST.has16BitInsts() &&
4668 switch (OperandType) {
4678 int32_t Trunc =
static_cast<int32_t
>(Imm);
4720 int16_t Trunc =
static_cast<int16_t
>(Imm);
4721 return ST.has16BitInsts() &&
4730 int16_t Trunc =
static_cast<int16_t
>(Imm);
4731 return ST.has16BitInsts() &&
4782 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4788 return ST.hasVOP3Literal();
4792 int64_t ImmVal)
const {
4795 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4796 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4797 AMDGPU::OpName::src2))
4799 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4811 "unexpected imm-like operand kind");
4824 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4842 AMDGPU::OpName
OpName)
const {
4844 return Mods && Mods->
getImm();
4857 switch (
MI.getOpcode()) {
4858 default:
return false;
4860 case AMDGPU::V_ADDC_U32_e64:
4861 case AMDGPU::V_SUBB_U32_e64:
4862 case AMDGPU::V_SUBBREV_U32_e64: {
4870 case AMDGPU::V_MAC_F16_e64:
4871 case AMDGPU::V_MAC_F32_e64:
4872 case AMDGPU::V_MAC_LEGACY_F32_e64:
4873 case AMDGPU::V_FMAC_F16_e64:
4874 case AMDGPU::V_FMAC_F16_t16_e64:
4875 case AMDGPU::V_FMAC_F16_fake16_e64:
4876 case AMDGPU::V_FMAC_F32_e64:
4877 case AMDGPU::V_FMAC_F64_e64:
4878 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4884 case AMDGPU::V_CNDMASK_B32_e64:
4890 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4920 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4929 unsigned Op32)
const {
4943 Inst32.
add(
MI.getOperand(
I));
4947 int Idx =
MI.getNumExplicitDefs();
4949 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4954 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4976 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4984 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4987 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4988 AMDGPU::SReg_64RegClass.contains(Reg);
4994 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
5006 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
5016 switch (MO.getReg()) {
5018 case AMDGPU::VCC_LO:
5019 case AMDGPU::VCC_HI:
5021 case AMDGPU::FLAT_SCR:
5034 switch (
MI.getOpcode()) {
5035 case AMDGPU::V_READLANE_B32:
5036 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5037 case AMDGPU::V_WRITELANE_B32:
5038 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5045 if (
MI.isPreISelOpcode() ||
5046 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5061 if (
SubReg.getReg().isPhysical())
5064 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
5075 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
5076 ErrInfo =
"illegal copy from vector register to SGPR";
5094 if (!
MRI.isSSA() &&
MI.isCopy())
5095 return verifyCopy(
MI,
MRI, ErrInfo);
5097 if (SIInstrInfo::isGenericOpcode(Opcode))
5100 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5101 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5102 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5104 if (Src0Idx == -1) {
5106 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5107 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5108 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5109 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5114 if (!
Desc.isVariadic() &&
5115 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5116 ErrInfo =
"Instruction has wrong number of operands.";
5120 if (
MI.isInlineAsm()) {
5133 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5134 ErrInfo =
"inlineasm operand has incorrect register class.";
5142 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5143 ErrInfo =
"missing memory operand from image instruction.";
5148 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5151 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5152 "all fp values to integers.";
5157 int16_t RegClass = getOpRegClassID(OpInfo);
5159 switch (OpInfo.OperandType) {
5161 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5162 ErrInfo =
"Illegal immediate value for operand.";
5197 ErrInfo =
"Illegal immediate value for operand.";
5204 ErrInfo =
"Expected inline constant for operand.";
5219 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5220 ErrInfo =
"Expected immediate, but got non-immediate";
5229 if (OpInfo.isGenericType())
5244 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5246 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5248 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5249 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5256 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5257 ErrInfo =
"Subtarget requires even aligned vector registers";
5262 if (RegClass != -1) {
5263 if (Reg.isVirtual())
5268 ErrInfo =
"Operand has incorrect register class.";
5276 if (!ST.hasSDWA()) {
5277 ErrInfo =
"SDWA is not supported on this target";
5281 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5282 AMDGPU::OpName::dst_sel}) {
5286 int64_t Imm = MO->
getImm();
5288 ErrInfo =
"Invalid SDWA selection";
5293 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5295 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5300 if (!ST.hasSDWAScalar()) {
5302 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5303 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5310 "Only reg allowed as operands in SDWA instructions on GFX9+";
5316 if (!ST.hasSDWAOmod()) {
5319 if (OMod !=
nullptr &&
5321 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5326 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5327 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5328 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5329 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5332 unsigned Mods = Src0ModsMO->
getImm();
5335 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5341 if (
isVOPC(BasicOpcode)) {
5342 if (!ST.hasSDWASdst() && DstIdx != -1) {
5345 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5346 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5349 }
else if (!ST.hasSDWAOutModsVOPC()) {
5352 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5353 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5359 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5360 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5367 if (DstUnused && DstUnused->isImm() &&
5370 if (!Dst.isReg() || !Dst.isTied()) {
5371 ErrInfo =
"Dst register should have tied register";
5376 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5379 "Dst register should be tied to implicit use of preserved register";
5383 ErrInfo =
"Dst register should use same physical register as preserved";
5390 if (
isImage(Opcode) && !
MI.mayStore()) {
5402 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5410 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5414 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5415 if (RegCount > DstSize) {
5416 ErrInfo =
"Image instruction returns too many registers for dst "
5425 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5426 unsigned ConstantBusCount = 0;
5427 bool UsesLiteral =
false;
5430 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5434 LiteralVal = &
MI.getOperand(ImmIdx);
5443 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5454 }
else if (!MO.
isFI()) {
5461 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5471 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5472 return !RI.regsOverlap(SGPRUsed, SGPR);
5481 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5482 Opcode != AMDGPU::V_WRITELANE_B32) {
5483 ErrInfo =
"VOP* instruction violates constant bus restriction";
5487 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5488 ErrInfo =
"VOP3 instruction uses literal";
5495 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5496 unsigned SGPRCount = 0;
5499 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5507 if (MO.
getReg() != SGPRUsed)
5512 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5513 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5520 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5521 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5528 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5538 ErrInfo =
"ABS not allowed in VOP3B instructions";
5551 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5558 if (
Desc.isBranch()) {
5560 ErrInfo =
"invalid branch target for SOPK instruction";
5567 ErrInfo =
"invalid immediate for SOPK instruction";
5572 ErrInfo =
"invalid immediate for SOPK instruction";
5579 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5580 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5581 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5582 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5583 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5584 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5586 const unsigned StaticNumOps =
5587 Desc.getNumOperands() +
Desc.implicit_uses().size();
5588 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5594 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5595 ErrInfo =
"missing implicit register operands";
5601 if (!Dst->isUse()) {
5602 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5607 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5608 UseOpIdx != StaticNumOps + 1) {
5609 ErrInfo =
"movrel implicit operands should be tied";
5616 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5618 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5619 ErrInfo =
"src0 should be subreg of implicit vector use";
5627 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5628 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5634 if (
MI.mayStore() &&
5639 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5640 ErrInfo =
"scalar stores must use m0 as offset register";
5646 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5648 if (
Offset->getImm() != 0) {
5649 ErrInfo =
"subtarget does not support offsets in flat instructions";
5654 if (
isDS(
MI) && !ST.hasGDS()) {
5656 if (GDSOp && GDSOp->
getImm() != 0) {
5657 ErrInfo =
"GDS is not supported on this subtarget";
5665 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5666 AMDGPU::OpName::vaddr0);
5667 AMDGPU::OpName RSrcOpName =
5668 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5669 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5677 ErrInfo =
"dim is out of range";
5682 if (ST.hasR128A16()) {
5684 IsA16 = R128A16->
getImm() != 0;
5685 }
else if (ST.hasA16()) {
5687 IsA16 = A16->
getImm() != 0;
5690 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5692 unsigned AddrWords =
5695 unsigned VAddrWords;
5697 VAddrWords = RsrcIdx - VAddr0Idx;
5698 if (ST.hasPartialNSAEncoding() &&
5700 unsigned LastVAddrIdx = RsrcIdx - 1;
5701 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5709 if (VAddrWords != AddrWords) {
5711 <<
" but got " << VAddrWords <<
"\n");
5712 ErrInfo =
"bad vaddr size";
5722 unsigned DC = DppCt->
getImm();
5723 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5724 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5725 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5726 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5727 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5728 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5729 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5730 ErrInfo =
"Invalid dpp_ctrl value";
5733 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5735 ErrInfo =
"Invalid dpp_ctrl value: "
5736 "wavefront shifts are not supported on GFX10+";
5739 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5741 ErrInfo =
"Invalid dpp_ctrl value: "
5742 "broadcasts are not supported on GFX10+";
5745 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5747 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5748 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5749 !ST.hasGFX90AInsts()) {
5750 ErrInfo =
"Invalid dpp_ctrl value: "
5751 "row_newbroadcast/row_share is not supported before "
5755 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5756 ErrInfo =
"Invalid dpp_ctrl value: "
5757 "row_share and row_xmask are not supported before GFX10";
5762 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5765 ErrInfo =
"Invalid dpp_ctrl value: "
5766 "DP ALU dpp only support row_newbcast";
5773 AMDGPU::OpName DataName =
5774 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5780 if (ST.hasGFX90AInsts()) {
5781 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5782 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5783 ErrInfo =
"Invalid register class: "
5784 "vdata and vdst should be both VGPR or AGPR";
5787 if (
Data && Data2 &&
5789 ErrInfo =
"Invalid register class: "
5790 "both data operands should be VGPR or AGPR";
5794 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5796 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5797 ErrInfo =
"Invalid register class: "
5798 "agpr loads and stores not supported on this GPU";
5804 if (ST.needsAlignedVGPRs()) {
5805 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5810 if (Reg.isPhysical())
5811 return !(RI.getHWRegIndex(Reg) & 1);
5813 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5814 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5817 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5818 Opcode == AMDGPU::DS_GWS_BARRIER) {
5820 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5821 ErrInfo =
"Subtarget requires even aligned vector registers "
5822 "for DS_GWS instructions";
5828 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5829 ErrInfo =
"Subtarget requires even aligned vector registers "
5830 "for vaddr operand of image instructions";
5836 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5838 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5839 ErrInfo =
"Invalid register class: "
5840 "v_accvgpr_write with an SGPR is not supported on this GPU";
5845 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5848 ErrInfo =
"pseudo expects only physical SGPRs";
5855 if (!ST.hasScaleOffset()) {
5856 ErrInfo =
"Subtarget does not support offset scaling";
5860 ErrInfo =
"Instruction does not support offset scaling";
5869 for (
unsigned I = 0;
I < 3; ++
I) {
5875 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5876 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5878 if ((Dst && RI.getRegClassForReg(
MRI, Dst->getReg()) ==
5879 &AMDGPU::SReg_64RegClass) ||
5880 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5881 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5893 switch (
MI.getOpcode()) {
5894 default:
return AMDGPU::INSTRUCTION_LIST_END;
5895 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5896 case AMDGPU::COPY:
return AMDGPU::COPY;
5897 case AMDGPU::PHI:
return AMDGPU::PHI;
5898 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5899 case AMDGPU::WQM:
return AMDGPU::WQM;
5900 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5901 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5902 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5903 case AMDGPU::S_MOV_B32: {
5905 return MI.getOperand(1).isReg() ||
5906 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5907 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5909 case AMDGPU::S_ADD_I32:
5910 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5911 case AMDGPU::S_ADDC_U32:
5912 return AMDGPU::V_ADDC_U32_e32;
5913 case AMDGPU::S_SUB_I32:
5914 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5917 case AMDGPU::S_ADD_U32:
5918 return AMDGPU::V_ADD_CO_U32_e32;
5919 case AMDGPU::S_SUB_U32:
5920 return AMDGPU::V_SUB_CO_U32_e32;
5921 case AMDGPU::S_ADD_U64_PSEUDO:
5922 return AMDGPU::V_ADD_U64_PSEUDO;
5923 case AMDGPU::S_SUB_U64_PSEUDO:
5924 return AMDGPU::V_SUB_U64_PSEUDO;
5925 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5926 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5927 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5928 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5929 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5930 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5931 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5932 case AMDGPU::S_XNOR_B32:
5933 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5934 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5935 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5936 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5937 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5938 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5939 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5940 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5941 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5942 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5943 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5944 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5945 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5946 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5947 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5948 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5949 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5950 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5951 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5952 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5953 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5954 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5955 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5956 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5957 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5958 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5959 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5960 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5961 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5962 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5963 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5964 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5965 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5966 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5967 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5968 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5969 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5970 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5971 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5972 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5973 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5974 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5975 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5976 case AMDGPU::S_CVT_F32_F16:
5977 case AMDGPU::S_CVT_HI_F32_F16:
5978 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5979 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5980 case AMDGPU::S_CVT_F16_F32:
5981 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5982 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5983 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5984 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5985 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5986 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5987 case AMDGPU::S_CEIL_F16:
5988 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
5989 : AMDGPU::V_CEIL_F16_fake16_e64;
5990 case AMDGPU::S_FLOOR_F16:
5991 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
5992 : AMDGPU::V_FLOOR_F16_fake16_e64;
5993 case AMDGPU::S_TRUNC_F16:
5994 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5995 : AMDGPU::V_TRUNC_F16_fake16_e64;
5996 case AMDGPU::S_RNDNE_F16:
5997 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
5998 : AMDGPU::V_RNDNE_F16_fake16_e64;
5999 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6000 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6001 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6002 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6003 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6004 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6005 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6006 case AMDGPU::S_ADD_F16:
6007 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6008 : AMDGPU::V_ADD_F16_fake16_e64;
6009 case AMDGPU::S_SUB_F16:
6010 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6011 : AMDGPU::V_SUB_F16_fake16_e64;
6012 case AMDGPU::S_MIN_F16:
6013 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6014 : AMDGPU::V_MIN_F16_fake16_e64;
6015 case AMDGPU::S_MAX_F16:
6016 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6017 : AMDGPU::V_MAX_F16_fake16_e64;
6018 case AMDGPU::S_MINIMUM_F16:
6019 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6020 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6021 case AMDGPU::S_MAXIMUM_F16:
6022 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6023 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6024 case AMDGPU::S_MUL_F16:
6025 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6026 : AMDGPU::V_MUL_F16_fake16_e64;
6027 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6028 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6029 case AMDGPU::S_FMAC_F16:
6030 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6031 : AMDGPU::V_FMAC_F16_fake16_e64;
6032 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6033 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6034 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6035 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6036 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6037 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6038 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6039 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6040 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6041 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6042 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6043 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6044 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6045 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6046 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6047 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6048 case AMDGPU::S_CMP_LT_F16:
6049 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6050 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6051 case AMDGPU::S_CMP_EQ_F16:
6052 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6053 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6054 case AMDGPU::S_CMP_LE_F16:
6055 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6056 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6057 case AMDGPU::S_CMP_GT_F16:
6058 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6059 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6060 case AMDGPU::S_CMP_LG_F16:
6061 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6062 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6063 case AMDGPU::S_CMP_GE_F16:
6064 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6065 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6066 case AMDGPU::S_CMP_O_F16:
6067 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6068 : AMDGPU::V_CMP_O_F16_fake16_e64;
6069 case AMDGPU::S_CMP_U_F16:
6070 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6071 : AMDGPU::V_CMP_U_F16_fake16_e64;
6072 case AMDGPU::S_CMP_NGE_F16:
6073 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6074 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6075 case AMDGPU::S_CMP_NLG_F16:
6076 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6077 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6078 case AMDGPU::S_CMP_NGT_F16:
6079 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6080 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6081 case AMDGPU::S_CMP_NLE_F16:
6082 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6083 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6084 case AMDGPU::S_CMP_NEQ_F16:
6085 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6086 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6087 case AMDGPU::S_CMP_NLT_F16:
6088 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6089 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6090 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6091 case AMDGPU::V_S_EXP_F16_e64:
6092 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6093 : AMDGPU::V_EXP_F16_fake16_e64;
6094 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6095 case AMDGPU::V_S_LOG_F16_e64:
6096 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6097 : AMDGPU::V_LOG_F16_fake16_e64;
6098 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6099 case AMDGPU::V_S_RCP_F16_e64:
6100 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6101 : AMDGPU::V_RCP_F16_fake16_e64;
6102 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6103 case AMDGPU::V_S_RSQ_F16_e64:
6104 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6105 : AMDGPU::V_RSQ_F16_fake16_e64;
6106 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6107 case AMDGPU::V_S_SQRT_F16_e64:
6108 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6109 : AMDGPU::V_SQRT_F16_fake16_e64;
6112 "Unexpected scalar opcode without corresponding vector one!");
6161 "Not a whole wave func");
6164 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6165 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6172 unsigned OpNo)
const {
6174 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6175 Desc.operands()[OpNo].RegClass == -1) {
6178 if (Reg.isVirtual()) {
6180 return MRI.getRegClass(Reg);
6182 return RI.getPhysRegBaseClass(Reg);
6185 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6186 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6194 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6196 unsigned Size = RI.getRegSizeInBits(*RC);
6197 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6198 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6199 : AMDGPU::V_MOV_B32_e32;
6201 Opcode = AMDGPU::COPY;
6202 else if (RI.isSGPRClass(RC))
6203 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6217 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6223 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6234 if (SubIdx == AMDGPU::sub0)
6236 if (SubIdx == AMDGPU::sub1)
6248void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6264 if (Reg.isPhysical())
6274 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6277 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6284 unsigned Opc =
MI.getOpcode();
6290 constexpr AMDGPU::OpName OpNames[] = {
6291 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6294 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6295 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6305 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6306 if (IsAGPR && !ST.hasMAIInsts())
6308 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6312 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6313 const int DataIdx = AMDGPU::getNamedOperandIdx(
6314 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6315 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6316 MI.getOperand(DataIdx).isReg() &&
6317 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6319 if ((
int)
OpIdx == DataIdx) {
6320 if (VDstIdx != -1 &&
6321 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6324 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6325 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6326 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6331 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6332 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6336 if (ST.hasFlatScratchHiInB64InstHazard() &&
6343 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6364 constexpr unsigned NumOps = 3;
6365 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6366 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6367 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6368 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6373 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6376 MO = &
MI.getOperand(SrcIdx);
6383 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6387 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6391 return !OpSel && !OpSelHi;
6400 int64_t RegClass = getOpRegClassID(OpInfo);
6402 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6411 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6412 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6416 if (!LiteralLimit--)
6426 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6434 if (--ConstantBusLimit <= 0)
6446 if (!LiteralLimit--)
6448 if (--ConstantBusLimit <= 0)
6454 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6458 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6460 !
Op.isIdenticalTo(*MO))
6470 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6484 bool Is64BitOp = Is64BitFPOp ||
6491 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6500 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6518 bool IsGFX950Only = ST.hasGFX950Insts();
6519 bool IsGFX940Only = ST.hasGFX940Insts();
6521 if (!IsGFX950Only && !IsGFX940Only)
6539 unsigned Opcode =
MI.getOpcode();
6541 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6542 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6543 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6544 case AMDGPU::V_MQSAD_U32_U8_e64:
6545 case AMDGPU::V_PK_ADD_F16:
6546 case AMDGPU::V_PK_ADD_F32:
6547 case AMDGPU::V_PK_ADD_I16:
6548 case AMDGPU::V_PK_ADD_U16:
6549 case AMDGPU::V_PK_ASHRREV_I16:
6550 case AMDGPU::V_PK_FMA_F16:
6551 case AMDGPU::V_PK_FMA_F32:
6552 case AMDGPU::V_PK_FMAC_F16_e32:
6553 case AMDGPU::V_PK_FMAC_F16_e64:
6554 case AMDGPU::V_PK_LSHLREV_B16:
6555 case AMDGPU::V_PK_LSHRREV_B16:
6556 case AMDGPU::V_PK_MAD_I16:
6557 case AMDGPU::V_PK_MAD_U16:
6558 case AMDGPU::V_PK_MAX_F16:
6559 case AMDGPU::V_PK_MAX_I16:
6560 case AMDGPU::V_PK_MAX_U16:
6561 case AMDGPU::V_PK_MIN_F16:
6562 case AMDGPU::V_PK_MIN_I16:
6563 case AMDGPU::V_PK_MIN_U16:
6564 case AMDGPU::V_PK_MOV_B32:
6565 case AMDGPU::V_PK_MUL_F16:
6566 case AMDGPU::V_PK_MUL_F32:
6567 case AMDGPU::V_PK_MUL_LO_U16:
6568 case AMDGPU::V_PK_SUB_I16:
6569 case AMDGPU::V_PK_SUB_U16:
6570 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6579 unsigned Opc =
MI.getOpcode();
6582 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6585 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6591 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6598 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6601 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6607 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6617 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6618 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6619 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6631 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6633 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6645 if (HasImplicitSGPR || !
MI.isCommutable()) {
6662 if (CommutedOpc == -1) {
6667 MI.setDesc(
get(CommutedOpc));
6671 bool Src0Kill = Src0.
isKill();
6675 else if (Src1.
isReg()) {
6690 unsigned Opc =
MI.getOpcode();
6693 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6694 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6695 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6698 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6699 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6700 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6701 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6702 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6703 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6704 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6708 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6709 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6714 if (VOP3Idx[2] != -1) {
6716 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6717 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6726 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6727 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6729 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6731 SGPRsUsed.
insert(SGPRReg);
6735 for (
int Idx : VOP3Idx) {
6744 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6756 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6763 if (ConstantBusLimit > 0) {
6775 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6776 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6782 for (
unsigned I = 0;
I < 3; ++
I) {
6795 SRC = RI.getCommonSubClass(SRC, DstRC);
6798 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6800 if (RI.hasAGPRs(VRC)) {
6801 VRC = RI.getEquivalentVGPRClass(VRC);
6802 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6804 get(TargetOpcode::COPY), NewSrcReg)
6811 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6817 for (
unsigned i = 0; i < SubRegs; ++i) {
6818 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6820 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6821 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
6827 get(AMDGPU::REG_SEQUENCE), DstReg);
6828 for (
unsigned i = 0; i < SubRegs; ++i) {
6830 MIB.
addImm(RI.getSubRegFromChannel(i));
6843 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6845 SBase->setReg(SGPR);
6848 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6856 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6857 if (OldSAddrIdx < 0)
6873 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6874 if (NewVAddrIdx < 0)
6877 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6881 if (OldVAddrIdx >= 0) {
6883 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6895 if (OldVAddrIdx == NewVAddrIdx) {
6898 MRI.removeRegOperandFromUseList(&NewVAddr);
6899 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6903 MRI.removeRegOperandFromUseList(&NewVAddr);
6904 MRI.addRegOperandToUseList(&NewVAddr);
6906 assert(OldSAddrIdx == NewVAddrIdx);
6908 if (OldVAddrIdx >= 0) {
6909 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6910 AMDGPU::OpName::vdst_in);
6914 if (NewVDstIn != -1) {
6915 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6921 if (NewVDstIn != -1) {
6922 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6943 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6963 unsigned OpSubReg =
Op.getSubReg();
6966 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6972 Register DstReg =
MRI.createVirtualRegister(DstRC);
6982 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6985 bool ImpDef = Def->isImplicitDef();
6986 while (!ImpDef && Def && Def->isCopy()) {
6987 if (Def->getOperand(1).getReg().isPhysical())
6989 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6990 ImpDef = Def && Def->isImplicitDef();
6992 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7011 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7017 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
7018 unsigned NumSubRegs =
RegSize / 32;
7019 Register VScalarOp = ScalarOp->getReg();
7021 if (NumSubRegs == 1) {
7022 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7024 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7027 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7029 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7035 CondReg = NewCondReg;
7037 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7045 ScalarOp->setReg(CurReg);
7046 ScalarOp->setIsKill();
7050 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7051 "Unhandled register size");
7053 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7055 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7057 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7060 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7061 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7064 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7065 .
addReg(VScalarOp, VScalarOpUndef,
7066 TRI->getSubRegFromChannel(Idx + 1));
7072 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
7073 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7079 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7080 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
7083 if (NumSubRegs <= 2)
7084 Cmp.addReg(VScalarOp);
7086 Cmp.addReg(VScalarOp, VScalarOpUndef,
7087 TRI->getSubRegFromChannel(Idx, 2));
7091 CondReg = NewCondReg;
7093 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7101 const auto *SScalarOpRC =
7102 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
7103 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
7107 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7108 unsigned Channel = 0;
7109 for (
Register Piece : ReadlanePieces) {
7110 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7114 ScalarOp->setReg(SScalarOp);
7115 ScalarOp->setIsKill();
7119 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7120 MRI.setSimpleHint(SaveExec, CondReg);
7151 if (!Begin.isValid())
7153 if (!End.isValid()) {
7159 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7167 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7168 std::numeric_limits<unsigned>::max()) !=
7171 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7177 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7186 for (
auto I = Begin;
I != AfterMI;
I++) {
7187 for (
auto &MO :
I->all_uses())
7188 MRI.clearKillFlags(MO.getReg());
7213 MBB.addSuccessor(LoopBB);
7223 for (
auto &Succ : RemainderBB->
successors()) {
7247static std::tuple<unsigned, unsigned>
7255 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7256 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7259 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7260 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7261 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7262 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7263 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7280 .
addImm(AMDGPU::sub0_sub1)
7286 return std::tuple(RsrcPtr, NewSRsrc);
7323 if (
MI.getOpcode() == AMDGPU::PHI) {
7325 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7326 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7329 MRI.getRegClass(
MI.getOperand(i).getReg());
7330 if (RI.hasVectorRegisters(OpRC)) {
7344 VRC = &AMDGPU::VReg_1RegClass;
7347 ? RI.getEquivalentAGPRClass(SRC)
7348 : RI.getEquivalentVGPRClass(SRC);
7351 ? RI.getEquivalentAGPRClass(VRC)
7352 : RI.getEquivalentVGPRClass(VRC);
7360 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7362 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7378 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7381 if (RI.hasVGPRs(DstRC)) {
7385 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7387 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7405 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7410 if (DstRC != Src0RC) {
7419 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7421 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7427 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7428 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7429 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7430 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7431 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7432 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7433 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7435 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7448 ? AMDGPU::OpName::rsrc
7449 : AMDGPU::OpName::srsrc;
7451 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7454 AMDGPU::OpName SampOpName =
7455 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7457 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7464 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7466 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7470 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7471 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7476 while (Start->getOpcode() != FrameSetupOpcode)
7479 while (End->getOpcode() != FrameDestroyOpcode)
7483 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7484 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7492 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7494 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7496 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7506 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7507 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7508 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7509 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7511 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7518 bool isSoffsetLegal =
true;
7520 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7521 if (SoffsetIdx != -1) {
7524 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7525 isSoffsetLegal =
false;
7529 bool isRsrcLegal =
true;
7531 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7532 if (RsrcIdx != -1) {
7535 isRsrcLegal =
false;
7539 if (isRsrcLegal && isSoffsetLegal)
7563 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7564 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7565 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7567 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7568 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7569 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7571 unsigned RsrcPtr, NewSRsrc;
7578 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7585 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7599 }
else if (!VAddr && ST.hasAddr64()) {
7603 "FIXME: Need to emit flat atomics here");
7605 unsigned RsrcPtr, NewSRsrc;
7608 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7631 MIB.
addImm(CPol->getImm());
7636 MIB.
addImm(TFE->getImm());
7656 MI.removeFromParent();
7661 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7663 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7667 if (!isSoffsetLegal) {
7679 if (!isSoffsetLegal) {
7691 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7692 if (RsrcIdx != -1) {
7693 DeferredList.insert(
MI);
7698 return DeferredList.contains(
MI);
7708 if (!ST.useRealTrue16Insts())
7711 unsigned Opcode =
MI.getOpcode();
7715 OpIdx >=
get(Opcode).getNumOperands() ||
7716 get(Opcode).operands()[
OpIdx].RegClass == -1)
7720 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7724 if (!RI.isVGPRClass(CurrRC))
7727 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7729 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7730 Op.setSubReg(AMDGPU::lo16);
7731 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7733 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7734 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7741 Op.setReg(NewDstReg);
7753 while (!Worklist.
empty()) {
7767 "Deferred MachineInstr are not supposed to re-populate worklist");
7787 case AMDGPU::S_ADD_I32:
7788 case AMDGPU::S_SUB_I32: {
7792 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7800 case AMDGPU::S_MUL_U64:
7801 if (ST.hasVectorMulU64()) {
7802 NewOpcode = AMDGPU::V_MUL_U64_e64;
7806 splitScalarSMulU64(Worklist, Inst, MDT);
7810 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7811 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7814 splitScalarSMulPseudo(Worklist, Inst, MDT);
7818 case AMDGPU::S_AND_B64:
7819 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7823 case AMDGPU::S_OR_B64:
7824 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7828 case AMDGPU::S_XOR_B64:
7829 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7833 case AMDGPU::S_NAND_B64:
7834 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7838 case AMDGPU::S_NOR_B64:
7839 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7843 case AMDGPU::S_XNOR_B64:
7844 if (ST.hasDLInsts())
7845 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7847 splitScalar64BitXnor(Worklist, Inst, MDT);
7851 case AMDGPU::S_ANDN2_B64:
7852 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7856 case AMDGPU::S_ORN2_B64:
7857 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7861 case AMDGPU::S_BREV_B64:
7862 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7866 case AMDGPU::S_NOT_B64:
7867 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7871 case AMDGPU::S_BCNT1_I32_B64:
7872 splitScalar64BitBCNT(Worklist, Inst);
7876 case AMDGPU::S_BFE_I64:
7877 splitScalar64BitBFE(Worklist, Inst);
7881 case AMDGPU::S_FLBIT_I32_B64:
7882 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7885 case AMDGPU::S_FF1_I32_B64:
7886 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7890 case AMDGPU::S_LSHL_B32:
7891 if (ST.hasOnlyRevVALUShifts()) {
7892 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7896 case AMDGPU::S_ASHR_I32:
7897 if (ST.hasOnlyRevVALUShifts()) {
7898 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7902 case AMDGPU::S_LSHR_B32:
7903 if (ST.hasOnlyRevVALUShifts()) {
7904 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7908 case AMDGPU::S_LSHL_B64:
7909 if (ST.hasOnlyRevVALUShifts()) {
7911 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7912 : AMDGPU::V_LSHLREV_B64_e64;
7916 case AMDGPU::S_ASHR_I64:
7917 if (ST.hasOnlyRevVALUShifts()) {
7918 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7922 case AMDGPU::S_LSHR_B64:
7923 if (ST.hasOnlyRevVALUShifts()) {
7924 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7929 case AMDGPU::S_ABS_I32:
7930 lowerScalarAbs(Worklist, Inst);
7934 case AMDGPU::S_ABSDIFF_I32:
7935 lowerScalarAbsDiff(Worklist, Inst);
7939 case AMDGPU::S_CBRANCH_SCC0:
7940 case AMDGPU::S_CBRANCH_SCC1: {
7943 bool IsSCC = CondReg == AMDGPU::SCC;
7951 case AMDGPU::S_BFE_U64:
7952 case AMDGPU::S_BFM_B64:
7955 case AMDGPU::S_PACK_LL_B32_B16:
7956 case AMDGPU::S_PACK_LH_B32_B16:
7957 case AMDGPU::S_PACK_HL_B32_B16:
7958 case AMDGPU::S_PACK_HH_B32_B16:
7959 movePackToVALU(Worklist,
MRI, Inst);
7963 case AMDGPU::S_XNOR_B32:
7964 lowerScalarXnor(Worklist, Inst);
7968 case AMDGPU::S_NAND_B32:
7969 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7973 case AMDGPU::S_NOR_B32:
7974 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7978 case AMDGPU::S_ANDN2_B32:
7979 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7983 case AMDGPU::S_ORN2_B32:
7984 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7992 case AMDGPU::S_ADD_CO_PSEUDO:
7993 case AMDGPU::S_SUB_CO_PSEUDO: {
7994 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7995 ? AMDGPU::V_ADDC_U32_e64
7996 : AMDGPU::V_SUBB_U32_e64;
7997 const auto *CarryRC = RI.getWaveMaskRegClass();
8000 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
8001 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
8008 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
8019 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8023 case AMDGPU::S_UADDO_PSEUDO:
8024 case AMDGPU::S_USUBO_PSEUDO: {
8030 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8031 ? AMDGPU::V_ADD_CO_U32_e64
8032 : AMDGPU::V_SUB_CO_U32_e64;
8034 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
8035 Register DestReg =
MRI.createVirtualRegister(NewRC);
8043 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
8044 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8048 case AMDGPU::S_LSHL1_ADD_U32:
8049 case AMDGPU::S_LSHL2_ADD_U32:
8050 case AMDGPU::S_LSHL3_ADD_U32:
8051 case AMDGPU::S_LSHL4_ADD_U32: {
8055 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8056 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8057 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8061 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg()));
8062 Register DestReg =
MRI.createVirtualRegister(NewRC);
8070 MRI.replaceRegWith(Dest.
getReg(), DestReg);
8071 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8075 case AMDGPU::S_CSELECT_B32:
8076 case AMDGPU::S_CSELECT_B64:
8077 lowerSelect(Worklist, Inst, MDT);
8080 case AMDGPU::S_CMP_EQ_I32:
8081 case AMDGPU::S_CMP_LG_I32:
8082 case AMDGPU::S_CMP_GT_I32:
8083 case AMDGPU::S_CMP_GE_I32:
8084 case AMDGPU::S_CMP_LT_I32:
8085 case AMDGPU::S_CMP_LE_I32:
8086 case AMDGPU::S_CMP_EQ_U32:
8087 case AMDGPU::S_CMP_LG_U32:
8088 case AMDGPU::S_CMP_GT_U32:
8089 case AMDGPU::S_CMP_GE_U32:
8090 case AMDGPU::S_CMP_LT_U32:
8091 case AMDGPU::S_CMP_LE_U32:
8092 case AMDGPU::S_CMP_EQ_U64:
8093 case AMDGPU::S_CMP_LG_U64:
8094 case AMDGPU::S_CMP_LT_F32:
8095 case AMDGPU::S_CMP_EQ_F32:
8096 case AMDGPU::S_CMP_LE_F32:
8097 case AMDGPU::S_CMP_GT_F32:
8098 case AMDGPU::S_CMP_LG_F32:
8099 case AMDGPU::S_CMP_GE_F32:
8100 case AMDGPU::S_CMP_O_F32:
8101 case AMDGPU::S_CMP_U_F32:
8102 case AMDGPU::S_CMP_NGE_F32:
8103 case AMDGPU::S_CMP_NLG_F32:
8104 case AMDGPU::S_CMP_NGT_F32:
8105 case AMDGPU::S_CMP_NLE_F32:
8106 case AMDGPU::S_CMP_NEQ_F32:
8107 case AMDGPU::S_CMP_NLT_F32: {
8108 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8112 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8126 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8130 case AMDGPU::S_CMP_LT_F16:
8131 case AMDGPU::S_CMP_EQ_F16:
8132 case AMDGPU::S_CMP_LE_F16:
8133 case AMDGPU::S_CMP_GT_F16:
8134 case AMDGPU::S_CMP_LG_F16:
8135 case AMDGPU::S_CMP_GE_F16:
8136 case AMDGPU::S_CMP_O_F16:
8137 case AMDGPU::S_CMP_U_F16:
8138 case AMDGPU::S_CMP_NGE_F16:
8139 case AMDGPU::S_CMP_NLG_F16:
8140 case AMDGPU::S_CMP_NGT_F16:
8141 case AMDGPU::S_CMP_NLE_F16:
8142 case AMDGPU::S_CMP_NEQ_F16:
8143 case AMDGPU::S_CMP_NLT_F16: {
8144 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8166 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8170 case AMDGPU::S_CVT_HI_F32_F16: {
8171 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8172 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8173 if (ST.useRealTrue16Insts()) {
8178 .
addReg(TmpReg, 0, AMDGPU::hi16)
8194 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8198 case AMDGPU::S_MINIMUM_F32:
8199 case AMDGPU::S_MAXIMUM_F32: {
8200 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8211 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8215 case AMDGPU::S_MINIMUM_F16:
8216 case AMDGPU::S_MAXIMUM_F16: {
8217 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8218 ? &AMDGPU::VGPR_16RegClass
8219 : &AMDGPU::VGPR_32RegClass);
8231 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8235 case AMDGPU::V_S_EXP_F16_e64:
8236 case AMDGPU::V_S_LOG_F16_e64:
8237 case AMDGPU::V_S_RCP_F16_e64:
8238 case AMDGPU::V_S_RSQ_F16_e64:
8239 case AMDGPU::V_S_SQRT_F16_e64: {
8240 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8241 ? &AMDGPU::VGPR_16RegClass
8242 : &AMDGPU::VGPR_32RegClass);
8254 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8260 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8268 if (NewOpcode == Opcode) {
8276 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8278 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8292 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8299 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8300 MRI.replaceRegWith(DstReg, NewDstReg);
8301 MRI.clearKillFlags(NewDstReg);
8304 if (!
MRI.constrainRegClass(NewDstReg, CommonRC))
8321 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8325 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8326 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8327 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8329 get(AMDGPU::IMPLICIT_DEF), Undef);
8331 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8337 MRI.replaceRegWith(DstReg, NewDstReg);
8338 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8340 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8343 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8344 MRI.replaceRegWith(DstReg, NewDstReg);
8345 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8350 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8351 MRI.replaceRegWith(DstReg, NewDstReg);
8353 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8363 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8364 AMDGPU::OpName::src0_modifiers) >= 0)
8368 NewInstr->addOperand(Src);
8371 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8374 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8376 NewInstr.addImm(
Size);
8377 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8381 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8386 "Scalar BFE is only implemented for constant width and offset");
8394 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8395 AMDGPU::OpName::src1_modifiers) >= 0)
8397 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8399 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8400 AMDGPU::OpName::src2_modifiers) >= 0)
8402 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8404 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8406 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8408 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8414 NewInstr->addOperand(
Op);
8421 if (
Op.getReg() == AMDGPU::SCC) {
8423 if (
Op.isDef() && !
Op.isDead())
8424 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8426 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8431 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8432 Register DstReg = NewInstr->getOperand(0).getReg();
8437 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8438 MRI.replaceRegWith(DstReg, NewDstReg);
8447 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8451std::pair<bool, MachineBasicBlock *>
8463 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8466 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8468 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8469 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8477 MRI.replaceRegWith(OldDstReg, ResultReg);
8480 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8481 return std::pair(
true, NewBB);
8484 return std::pair(
false,
nullptr);
8501 bool IsSCC = (CondReg == AMDGPU::SCC);
8509 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8515 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8516 NewCondReg =
MRI.createVirtualRegister(TC);
8520 bool CopyFound =
false;
8521 for (MachineInstr &CandI :
8524 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8526 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8528 .
addReg(CandI.getOperand(1).getReg());
8540 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8548 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8549 MachineInstr *NewInst;
8550 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8551 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8564 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8566 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8578 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8579 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8581 unsigned SubOp = ST.hasAddNoCarry() ?
8582 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8592 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8593 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8606 Register SubResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8607 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8608 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8611 ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8623 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8624 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8638 if (ST.hasDLInsts()) {
8639 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8647 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8648 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8654 bool Src0IsSGPR = Src0.
isReg() &&
8655 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8656 bool Src1IsSGPR = Src1.
isReg() &&
8657 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8659 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8660 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8670 }
else if (Src1IsSGPR) {
8684 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8688 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8694 unsigned Opcode)
const {
8704 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8705 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8717 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8718 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8723 unsigned Opcode)
const {
8733 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8734 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8746 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8747 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8762 const MCInstrDesc &InstDesc =
get(Opcode);
8763 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8765 &AMDGPU::SGPR_32RegClass;
8767 const TargetRegisterClass *Src0SubRC =
8768 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8771 AMDGPU::sub0, Src0SubRC);
8773 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8774 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8775 const TargetRegisterClass *NewDestSubRC =
8776 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8778 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8779 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8782 AMDGPU::sub1, Src0SubRC);
8784 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8785 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8790 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8797 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8799 Worklist.
insert(&LoHalf);
8800 Worklist.
insert(&HiHalf);
8806 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8817 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8818 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8819 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8827 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8828 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8829 const TargetRegisterClass *Src0SubRC =
8830 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8831 if (RI.isSGPRClass(Src0SubRC))
8832 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8833 const TargetRegisterClass *Src1SubRC =
8834 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8835 if (RI.isSGPRClass(Src1SubRC))
8836 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8840 MachineOperand Op0L =
8842 MachineOperand Op1L =
8844 MachineOperand Op0H =
8846 MachineOperand Op1H =
8864 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8865 MachineInstr *Op1L_Op0H =
8870 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8871 MachineInstr *Op1H_Op0L =
8876 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8877 MachineInstr *Carry =
8882 MachineInstr *LoHalf =
8887 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8892 MachineInstr *HiHalf =
8903 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8915 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8926 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8927 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8928 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8936 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8937 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8938 const TargetRegisterClass *Src0SubRC =
8939 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8940 if (RI.isSGPRClass(Src0SubRC))
8941 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8942 const TargetRegisterClass *Src1SubRC =
8943 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8944 if (RI.isSGPRClass(Src1SubRC))
8945 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8949 MachineOperand Op0L =
8951 MachineOperand Op1L =
8955 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8956 ? AMDGPU::V_MUL_HI_U32_e64
8957 : AMDGPU::V_MUL_HI_I32_e64;
8958 MachineInstr *HiHalf =
8961 MachineInstr *LoHalf =
8972 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8980 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8996 const MCInstrDesc &InstDesc =
get(Opcode);
8997 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8999 &AMDGPU::SGPR_32RegClass;
9001 const TargetRegisterClass *Src0SubRC =
9002 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9003 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
9005 &AMDGPU::SGPR_32RegClass;
9007 const TargetRegisterClass *Src1SubRC =
9008 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9011 AMDGPU::sub0, Src0SubRC);
9013 AMDGPU::sub0, Src1SubRC);
9015 AMDGPU::sub1, Src0SubRC);
9017 AMDGPU::sub1, Src1SubRC);
9019 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9020 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9021 const TargetRegisterClass *NewDestSubRC =
9022 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9024 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
9025 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9029 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
9030 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9034 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
9041 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
9043 Worklist.
insert(&LoHalf);
9044 Worklist.
insert(&HiHalf);
9047 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
9063 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9065 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
9067 MachineOperand* Op0;
9068 MachineOperand* Op1;
9081 Register NewDest =
MRI.createVirtualRegister(DestRC);
9087 MRI.replaceRegWith(Dest.
getReg(), NewDest);
9103 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9104 const TargetRegisterClass *SrcRC = Src.isReg() ?
9105 MRI.getRegClass(Src.getReg()) :
9106 &AMDGPU::SGPR_32RegClass;
9108 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9109 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9111 const TargetRegisterClass *SrcSubRC =
9112 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9115 AMDGPU::sub0, SrcSubRC);
9117 AMDGPU::sub1, SrcSubRC);
9123 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9127 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9146 Offset == 0 &&
"Not implemented");
9149 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9150 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9151 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9168 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9169 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9174 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9175 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9179 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
9182 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
9187 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9188 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9207 const MCInstrDesc &InstDesc =
get(Opcode);
9209 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9210 unsigned OpcodeAdd =
9211 ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
9213 const TargetRegisterClass *SrcRC =
9214 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9215 const TargetRegisterClass *SrcSubRC =
9216 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9218 MachineOperand SrcRegSub0 =
9220 MachineOperand SrcRegSub1 =
9223 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9224 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9225 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9226 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9233 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9239 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9241 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9243 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9246void SIInstrInfo::addUsersToMoveToVALUWorklist(
9250 MachineInstr &
UseMI = *MO.getParent();
9254 switch (
UseMI.getOpcode()) {
9257 case AMDGPU::SOFT_WQM:
9258 case AMDGPU::STRICT_WWM:
9259 case AMDGPU::STRICT_WQM:
9260 case AMDGPU::REG_SEQUENCE:
9262 case AMDGPU::INSERT_SUBREG:
9265 OpNo = MO.getOperandNo();
9270 MRI.constrainRegClass(DstReg, OpRC);
9272 if (!RI.hasVectorRegisters(OpRC))
9283 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9289 if (ST.useRealTrue16Insts()) {
9292 SrcReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9299 SrcReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9305 bool isSrc0Reg16 =
MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
9306 bool isSrc1Reg16 =
MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
9308 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9310 case AMDGPU::S_PACK_LL_B32_B16:
9313 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9314 .addImm(AMDGPU::lo16)
9316 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9317 .addImm(AMDGPU::hi16);
9319 case AMDGPU::S_PACK_LH_B32_B16:
9322 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9323 .addImm(AMDGPU::lo16)
9324 .addReg(SrcReg1, 0, AMDGPU::hi16)
9325 .addImm(AMDGPU::hi16);
9327 case AMDGPU::S_PACK_HL_B32_B16:
9328 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9329 .addImm(AMDGPU::lo16)
9331 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9332 .addImm(AMDGPU::hi16);
9334 case AMDGPU::S_PACK_HH_B32_B16:
9335 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9336 .addImm(AMDGPU::lo16)
9337 .addReg(SrcReg1, 0, AMDGPU::hi16)
9338 .addImm(AMDGPU::hi16);
9345 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9346 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9351 case AMDGPU::S_PACK_LL_B32_B16: {
9352 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9353 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9370 case AMDGPU::S_PACK_LH_B32_B16: {
9371 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9380 case AMDGPU::S_PACK_HL_B32_B16: {
9381 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9391 case AMDGPU::S_PACK_HH_B32_B16: {
9392 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9393 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9410 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9411 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9420 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9421 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9422 SmallVector<MachineInstr *, 4> CopyToDelete;
9425 for (MachineInstr &
MI :
9429 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9432 MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9433 Register DestReg =
MI.getOperand(0).getReg();
9435 MRI.replaceRegWith(DestReg, NewCond);
9440 MI.getOperand(SCCIdx).setReg(NewCond);
9446 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9449 for (
auto &Copy : CopyToDelete)
9450 Copy->eraseFromParent();
9458void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9464 for (MachineInstr &
MI :
9467 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9469 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9478 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9486 case AMDGPU::REG_SEQUENCE:
9487 case AMDGPU::INSERT_SUBREG:
9489 case AMDGPU::SOFT_WQM:
9490 case AMDGPU::STRICT_WWM:
9491 case AMDGPU::STRICT_WQM: {
9493 if (RI.isAGPRClass(SrcRC)) {
9494 if (RI.isAGPRClass(NewDstRC))
9499 case AMDGPU::REG_SEQUENCE:
9500 case AMDGPU::INSERT_SUBREG:
9501 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9504 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9510 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9513 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9527 int OpIndices[3])
const {
9528 const MCInstrDesc &
Desc =
MI.getDesc();
9544 const MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9546 for (
unsigned i = 0; i < 3; ++i) {
9547 int Idx = OpIndices[i];
9551 const MachineOperand &MO =
MI.getOperand(Idx);
9557 const TargetRegisterClass *OpRC =
9558 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9559 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9565 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9566 if (RI.isSGPRClass(RegRC))
9584 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9585 SGPRReg = UsedSGPRs[0];
9588 if (!SGPRReg && UsedSGPRs[1]) {
9589 if (UsedSGPRs[1] == UsedSGPRs[2])
9590 SGPRReg = UsedSGPRs[1];
9597 AMDGPU::OpName OperandName)
const {
9598 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9601 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9605 return &
MI.getOperand(Idx);
9619 if (ST.isAmdHsaOS()) {
9622 RsrcDataFormat |= (1ULL << 56);
9627 RsrcDataFormat |= (2ULL << 59);
9630 return RsrcDataFormat;
9640 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9645 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9652 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9658 unsigned Opc =
MI.getOpcode();
9664 return get(
Opc).mayLoad() &&
9669 int &FrameIndex)
const {
9671 if (!Addr || !Addr->
isFI())
9682 int &FrameIndex)
const {
9690 int &FrameIndex)
const {
9704 int &FrameIndex)
const {
9721 while (++
I != E &&
I->isInsideBundle()) {
9722 assert(!
I->isBundle() &&
"No nested bundle!");
9730 unsigned Opc =
MI.getOpcode();
9732 unsigned DescSize =
Desc.getSize();
9737 unsigned Size = DescSize;
9741 if (
MI.isBranch() && ST.hasOffset3fBug())
9752 bool HasLiteral =
false;
9753 unsigned LiteralSize = 4;
9754 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9759 if (ST.has64BitLiterals()) {
9760 switch (OpInfo.OperandType) {
9776 return HasLiteral ? DescSize + LiteralSize : DescSize;
9781 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9785 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9786 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9790 case TargetOpcode::BUNDLE:
9792 case TargetOpcode::INLINEASM:
9793 case TargetOpcode::INLINEASM_BR: {
9795 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9799 if (
MI.isMetaInstruction())
9803 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9806 unsigned LoInstOpcode = D16Info->LoOp;
9808 DescSize =
Desc.getSize();
9812 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9815 DescSize =
Desc.getSize();
9826 if (
MI.memoperands_empty())
9838 static const std::pair<int, const char *> TargetIndices[] = {
9876std::pair<unsigned, unsigned>
9883 static const std::pair<unsigned, const char *> TargetFlags[] = {
9901 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9916 return AMDGPU::WWM_COPY;
9918 return AMDGPU::COPY;
9935 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
9939 if (RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg)))
9940 return IsLRSplitInst;
9953 bool IsNullOrVectorRegister =
true;
9957 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9960 return IsNullOrVectorRegister &&
9962 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
9963 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9971 if (ST.hasAddNoCarry())
9975 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9976 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9987 if (ST.hasAddNoCarry())
9991 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
9993 : RS.scavengeRegisterBackwards(
9994 *RI.getBoolRC(),
I,
false,
10007 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
10008 case AMDGPU::SI_KILL_I1_TERMINATOR:
10017 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10018 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10019 case AMDGPU::SI_KILL_I1_PSEUDO:
10020 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10032 const unsigned OffsetBits =
10034 return (1 << OffsetBits) - 1;
10038 if (!ST.isWave32())
10041 if (
MI.isInlineAsm())
10044 for (
auto &
Op :
MI.implicit_operands()) {
10045 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10046 Op.setReg(AMDGPU::VCC_LO);
10055 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10059 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10060 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10076 if (Imm > MaxImm) {
10077 if (Imm <= MaxImm + 64) {
10079 Overflow = Imm - MaxImm;
10098 if (Overflow > 0) {
10106 if (ST.hasRestrictedSOffset())
10111 SOffset = Overflow;
10149 if (!ST.hasFlatInstOffsets())
10157 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10169std::pair<int64_t, int64_t>
10172 int64_t RemainderOffset = COffsetVal;
10173 int64_t ImmField = 0;
10178 if (AllowNegative) {
10180 int64_t
D = 1LL << NumBits;
10181 RemainderOffset = (COffsetVal /
D) *
D;
10182 ImmField = COffsetVal - RemainderOffset;
10184 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10186 (ImmField % 4) != 0) {
10188 RemainderOffset += ImmField % 4;
10189 ImmField -= ImmField % 4;
10191 }
else if (COffsetVal >= 0) {
10193 RemainderOffset = COffsetVal - ImmField;
10197 assert(RemainderOffset + ImmField == COffsetVal);
10198 return {ImmField, RemainderOffset};
10202 if (ST.hasNegativeScratchOffsetBug() &&
10210 switch (ST.getGeneration()) {
10236 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10237 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10238 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10239 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10240 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10241 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10242 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10243 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10250#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10251 case OPCODE##_dpp: \
10252 case OPCODE##_e32: \
10253 case OPCODE##_e64: \
10254 case OPCODE##_e64_dpp: \
10255 case OPCODE##_sdwa:
10269 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10270 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10271 case AMDGPU::V_FMA_F16_gfx9_e64:
10272 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10273 case AMDGPU::V_INTERP_P2_F16:
10274 case AMDGPU::V_MAD_F16_e64:
10275 case AMDGPU::V_MAD_U16_e64:
10276 case AMDGPU::V_MAD_I16_e64:
10285 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10299 switch (ST.getGeneration()) {
10312 if (
isMAI(Opcode)) {
10320 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
10327 if (ST.hasGFX90AInsts()) {
10329 if (ST.hasGFX940Insts())
10360 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10361 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10362 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10374 switch (
MI.getOpcode()) {
10376 case AMDGPU::REG_SEQUENCE:
10380 case AMDGPU::INSERT_SUBREG:
10381 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10398 if (!
P.Reg.isVirtual())
10402 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10403 while (
auto *
MI = DefInst) {
10405 switch (
MI->getOpcode()) {
10407 case AMDGPU::V_MOV_B32_e32: {
10408 auto &Op1 =
MI->getOperand(1);
10413 DefInst =
MRI.getVRegDef(RSR.Reg);
10421 DefInst =
MRI.getVRegDef(RSR.Reg);
10434 assert(
MRI.isSSA() &&
"Must be run on SSA");
10436 auto *
TRI =
MRI.getTargetRegisterInfo();
10437 auto *DefBB =
DefMI.getParent();
10441 if (
UseMI.getParent() != DefBB)
10444 const int MaxInstScan = 20;
10448 auto E =
UseMI.getIterator();
10449 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10450 if (
I->isDebugInstr())
10453 if (++NumInst > MaxInstScan)
10456 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10466 assert(
MRI.isSSA() &&
"Must be run on SSA");
10468 auto *
TRI =
MRI.getTargetRegisterInfo();
10469 auto *DefBB =
DefMI.getParent();
10471 const int MaxUseScan = 10;
10474 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10475 auto &UseInst = *
Use.getParent();
10478 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10481 if (++NumUse > MaxUseScan)
10488 const int MaxInstScan = 20;
10492 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10495 if (
I->isDebugInstr())
10498 if (++NumInst > MaxInstScan)
10511 if (Reg == VReg && --NumUse == 0)
10513 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10522 auto Cur =
MBB.begin();
10523 if (Cur !=
MBB.end())
10525 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10528 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10537 if (InsPt !=
MBB.end() &&
10538 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10539 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10540 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10541 InsPt->definesRegister(Src,
nullptr)) {
10545 .
addReg(Src, 0, SrcSubReg)
10570 if (isFullCopyInstr(
MI)) {
10571 Register DstReg =
MI.getOperand(0).getReg();
10572 Register SrcReg =
MI.getOperand(1).getReg();
10579 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10583 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10594 unsigned *PredCost)
const {
10595 if (
MI.isBundle()) {
10598 unsigned Lat = 0,
Count = 0;
10599 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10601 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10603 return Lat +
Count - 1;
10606 return SchedModel.computeInstrLatency(&
MI);
10613 return *CallAddrOp;
10620 unsigned Opcode =
MI.getOpcode();
10625 :
MI.getOperand(1).getReg();
10626 LLT DstTy =
MRI.getType(Dst);
10627 LLT SrcTy =
MRI.getType(Src);
10629 unsigned SrcAS = SrcTy.getAddressSpace();
10632 ST.hasGloballyAddressableScratch()
10640 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10641 return HandleAddrSpaceCast(
MI);
10644 auto IID = GI->getIntrinsicID();
10651 case Intrinsic::amdgcn_addrspacecast_nonnull:
10652 return HandleAddrSpaceCast(
MI);
10653 case Intrinsic::amdgcn_if:
10654 case Intrinsic::amdgcn_else:
10668 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10669 Opcode == AMDGPU::G_SEXTLOAD) {
10670 if (
MI.memoperands_empty())
10674 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10675 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10683 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10684 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10685 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10698 unsigned opcode =
MI.getOpcode();
10699 if (opcode == AMDGPU::V_READLANE_B32 ||
10700 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10701 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10704 if (isCopyInstr(
MI)) {
10708 RI.getPhysRegBaseClass(srcOp.
getReg());
10716 if (
MI.isPreISelOpcode())
10731 if (
MI.memoperands_empty())
10735 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10736 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10751 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10753 if (!
SrcOp.isReg())
10757 if (!Reg || !
SrcOp.readsReg())
10763 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10790 F,
"ds_ordered_count unsupported for this calling conv"));
10804 Register &SrcReg2, int64_t &CmpMask,
10805 int64_t &CmpValue)
const {
10806 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10809 switch (
MI.getOpcode()) {
10812 case AMDGPU::S_CMP_EQ_U32:
10813 case AMDGPU::S_CMP_EQ_I32:
10814 case AMDGPU::S_CMP_LG_U32:
10815 case AMDGPU::S_CMP_LG_I32:
10816 case AMDGPU::S_CMP_LT_U32:
10817 case AMDGPU::S_CMP_LT_I32:
10818 case AMDGPU::S_CMP_GT_U32:
10819 case AMDGPU::S_CMP_GT_I32:
10820 case AMDGPU::S_CMP_LE_U32:
10821 case AMDGPU::S_CMP_LE_I32:
10822 case AMDGPU::S_CMP_GE_U32:
10823 case AMDGPU::S_CMP_GE_I32:
10824 case AMDGPU::S_CMP_EQ_U64:
10825 case AMDGPU::S_CMP_LG_U64:
10826 SrcReg =
MI.getOperand(0).getReg();
10827 if (
MI.getOperand(1).isReg()) {
10828 if (
MI.getOperand(1).getSubReg())
10830 SrcReg2 =
MI.getOperand(1).getReg();
10832 }
else if (
MI.getOperand(1).isImm()) {
10834 CmpValue =
MI.getOperand(1).getImm();
10840 case AMDGPU::S_CMPK_EQ_U32:
10841 case AMDGPU::S_CMPK_EQ_I32:
10842 case AMDGPU::S_CMPK_LG_U32:
10843 case AMDGPU::S_CMPK_LG_I32:
10844 case AMDGPU::S_CMPK_LT_U32:
10845 case AMDGPU::S_CMPK_LT_I32:
10846 case AMDGPU::S_CMPK_GT_U32:
10847 case AMDGPU::S_CMPK_GT_I32:
10848 case AMDGPU::S_CMPK_LE_U32:
10849 case AMDGPU::S_CMPK_LE_I32:
10850 case AMDGPU::S_CMPK_GE_U32:
10851 case AMDGPU::S_CMPK_GE_I32:
10852 SrcReg =
MI.getOperand(0).getReg();
10854 CmpValue =
MI.getOperand(1).getImm();
10864 if (S->isLiveIn(AMDGPU::SCC))
10873bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
10876 bool SCCIsDead =
false;
10879 constexpr unsigned ScanLimit = 12;
10880 unsigned Count = 0;
10881 for (MachineInstr &
MI :
10883 if (++
Count > ScanLimit)
10885 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
10886 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
10887 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
10888 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10889 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
10894 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
10907 for (MachineInstr *
MI : InvertInstr) {
10908 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10909 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
10911 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10912 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
10913 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
10914 ? AMDGPU::S_CBRANCH_SCC1
10915 : AMDGPU::S_CBRANCH_SCC0));
10928 bool NeedInversion)
const {
10929 MachineInstr *KillsSCC =
nullptr;
10934 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
10936 if (
MI.killsRegister(AMDGPU::SCC, &RI))
10939 if (NeedInversion && !invertSCCUse(SCCRedefine))
10941 if (MachineOperand *SccDef =
10943 SccDef->setIsDead(
false);
10951 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
10952 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
10954 bool Op1IsNonZeroImm =
10955 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
10956 bool Op2IsZeroImm =
10957 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
10958 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
10964 Register SrcReg2, int64_t CmpMask,
10973 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue,
MRI,
10974 this](
bool NeedInversion) ->
bool {
10995 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11005 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11006 MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
11012 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11020 optimizeSCC(
Select, Def,
false);
11027 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
11028 this](int64_t ExpectedValue,
unsigned SrcSize,
11029 bool IsReversible,
bool IsSigned) ->
bool {
11057 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11058 Def->getOpcode() != AMDGPU::S_AND_B64)
11062 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11073 SrcOp = &Def->getOperand(2);
11074 else if (isMask(&Def->getOperand(2)))
11075 SrcOp = &Def->getOperand(1);
11083 if (IsSigned && BitNo == SrcSize - 1)
11086 ExpectedValue <<= BitNo;
11088 bool IsReversedCC =
false;
11089 if (CmpValue != ExpectedValue) {
11092 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11097 Register DefReg = Def->getOperand(0).getReg();
11098 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
11101 if (!optimizeSCC(Def, &CmpInstr,
false))
11104 if (!
MRI->use_nodbg_empty(DefReg)) {
11112 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11113 : AMDGPU::S_BITCMP1_B32
11114 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11115 : AMDGPU::S_BITCMP1_B64;
11120 Def->eraseFromParent();
11128 case AMDGPU::S_CMP_EQ_U32:
11129 case AMDGPU::S_CMP_EQ_I32:
11130 case AMDGPU::S_CMPK_EQ_U32:
11131 case AMDGPU::S_CMPK_EQ_I32:
11132 return optimizeCmpAnd(1, 32,
true,
false) ||
11133 optimizeCmpSelect(
true);
11134 case AMDGPU::S_CMP_GE_U32:
11135 case AMDGPU::S_CMPK_GE_U32:
11136 return optimizeCmpAnd(1, 32,
false,
false);
11137 case AMDGPU::S_CMP_GE_I32:
11138 case AMDGPU::S_CMPK_GE_I32:
11139 return optimizeCmpAnd(1, 32,
false,
true);
11140 case AMDGPU::S_CMP_EQ_U64:
11141 return optimizeCmpAnd(1, 64,
true,
false);
11142 case AMDGPU::S_CMP_LG_U32:
11143 case AMDGPU::S_CMP_LG_I32:
11144 case AMDGPU::S_CMPK_LG_U32:
11145 case AMDGPU::S_CMPK_LG_I32:
11146 return optimizeCmpAnd(0, 32,
true,
false) ||
11147 optimizeCmpSelect(
false);
11148 case AMDGPU::S_CMP_GT_U32:
11149 case AMDGPU::S_CMPK_GT_U32:
11150 return optimizeCmpAnd(0, 32,
false,
false);
11151 case AMDGPU::S_CMP_GT_I32:
11152 case AMDGPU::S_CMPK_GT_I32:
11153 return optimizeCmpAnd(0, 32,
false,
true);
11154 case AMDGPU::S_CMP_LG_U64:
11155 return optimizeCmpAnd(0, 64,
true,
false) ||
11156 optimizeCmpSelect(
false);
11163 AMDGPU::OpName
OpName)
const {
11164 if (!ST.needsAlignedVGPRs())
11167 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11179 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
11181 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11184 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
11185 : &AMDGPU::VReg_64_Align2RegClass);
11187 .
addReg(DataReg, 0,
Op.getSubReg())
11192 Op.setSubReg(AMDGPU::sub0);
11214 unsigned Opcode =
MI.getOpcode();
11220 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11221 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11224 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasAddNoCarry() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool setsSCCifResultIsNonZero(const MachineInstr &MI)
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr unsigned getKillRegState(bool B)
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
constexpr unsigned getUndefRegState(bool B)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.