29 #include "llvm/IR/IntrinsicsAMDGPU.h"
36 #define DEBUG_TYPE "si-instr-info"
38 #define GET_INSTRINFO_CTOR_DTOR
39 #include "AMDGPUGenInstrInfo.inc"
46 #define GET_D16ImageDimIntrinsics_IMPL
47 #define GET_ImageDimIntrinsicTable_IMPL
48 #define GET_RsrcIntrinsics_IMPL
49 #include "AMDGPUGenSearchableTables.inc"
59 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
62 "amdgpu-fix-16-bit-physreg-copies",
63 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
78 unsigned N = Node->getNumOperands();
79 while (
N && Node->getOperand(
N - 1).getValueType() ==
MVT::Glue)
93 if (Op0Idx == -1 && Op1Idx == -1)
97 if ((Op0Idx == -1 && Op1Idx != -1) ||
98 (Op1Idx == -1 && Op0Idx != -1))
125 return !
MI.hasImplicitDef() &&
126 MI.getNumImplicitOperands() ==
MI.getDesc().getNumImplicitUses() &&
127 !
MI.mayRaiseFPException();
137 if (
MI.isCompare()) {
143 switch (
Use.getOpcode()) {
144 case AMDGPU::S_AND_SAVEEXEC_B32:
145 case AMDGPU::S_AND_SAVEEXEC_B64:
147 case AMDGPU::S_AND_B32:
148 case AMDGPU::S_AND_B64:
149 if (!
Use.readsRegister(AMDGPU::EXEC))
159 switch (
MI.getOpcode()) {
162 case AMDGPU::V_READFIRSTLANE_B32:
177 int64_t &Offset1)
const {
185 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
203 if (Offset0Idx == -1 || Offset1Idx == -1)
210 Offset0Idx -=
get(Opc0).NumDefs;
211 Offset1Idx -=
get(Opc1).NumDefs;
212 Offset0 = cast<ConstantSDNode>(Load0->
getOperand(Offset0Idx))->getZExtValue();
213 Offset1 = cast<ConstantSDNode>(Load1->
getOperand(Offset1Idx))->getZExtValue();
232 assert(NumOps == 4 || NumOps == 5);
237 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
239 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
241 if (!Load0Offset || !Load1Offset)
261 if (OffIdx0 == -1 || OffIdx1 == -1)
267 OffIdx0 -=
get(Opc0).NumDefs;
268 OffIdx1 -=
get(Opc1).NumDefs;
274 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
277 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
278 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
287 case AMDGPU::DS_READ2ST64_B32:
288 case AMDGPU::DS_READ2ST64_B64:
289 case AMDGPU::DS_WRITE2ST64_B32:
290 case AMDGPU::DS_WRITE2ST64_B64:
299 int64_t &Offset,
bool &OffsetIsScalable,
unsigned &
Width,
305 OffsetIsScalable =
false;
319 BaseOps.push_back(BaseOp);
320 Offset = OffsetOp->
getImm();
335 unsigned Offset0 = Offset0Op->
getImm();
336 unsigned Offset1 = Offset1Op->
getImm();
337 if (Offset0 + 1 != Offset1)
355 BaseOps.push_back(BaseOp);
356 Offset = EltSize * Offset0;
359 if (DataOpIdx == -1) {
375 BaseOps.push_back(RSrc);
377 if (BaseOp && !BaseOp->
isFI())
378 BaseOps.push_back(BaseOp);
381 Offset = OffsetImm->
getImm();
385 if (SOffset->
isReg())
386 BaseOps.push_back(SOffset);
388 Offset += SOffset->
getImm();
402 BaseOps.push_back(&LdSt.
getOperand(SRsrcIdx));
404 if (VAddr0Idx >= 0) {
406 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
422 BaseOps.push_back(BaseOp);
424 Offset = OffsetOp ? OffsetOp->
getImm() : 0;
435 BaseOps.push_back(BaseOp);
438 BaseOps.push_back(BaseOp);
468 if (MO1->getAddrSpace() != MO2->getAddrSpace())
471 auto Base1 = MO1->getValue();
472 auto Base2 = MO2->getValue();
473 if (!Base1 || !Base2)
478 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
481 return Base1 == Base2;
487 unsigned NumBytes)
const {
495 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
512 const unsigned LoadSize = NumBytes / NumLoads;
513 const unsigned NumDWORDs = ((LoadSize + 3) / 4) * NumLoads;
514 return NumDWORDs <= 8;
528 int64_t Offset0, int64_t Offset1,
529 unsigned NumLoads)
const {
530 assert(Offset1 > Offset0 &&
531 "Second offset should be larger than first offset!");
536 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
543 const char *
Msg =
"illegal SGPR to VGPR copy") {
564 assert((
TII.getSubtarget().hasMAIInsts() &&
565 !
TII.getSubtarget().hasGFX90AInsts()) &&
566 "Expected GFX908 subtarget.");
569 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
570 "Source register of the copy should be either an SGPR or an AGPR.");
573 "Destination register of the copy should be an AGPR.");
580 if (!
Def->definesRegister(SrcReg, &RI))
582 if (
Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
591 bool SafeToPropagate =
true;
592 for (
auto I =
Def;
I !=
MI && SafeToPropagate; ++
I)
593 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
594 SafeToPropagate =
false;
596 if (!SafeToPropagate)
608 if (ImpUseSuperReg) {
626 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
630 "VGPR used for an intermediate copy should have been reserved.");
635 while (RegNo-- && RS.
FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
644 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
645 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
646 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
653 if (ImpUseSuperReg) {
654 UseBuilder.
addReg(ImpUseSuperReg,
675 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
676 int16_t SubIdx = BaseIndices[Idx];
678 unsigned Opcode = AMDGPU::S_MOV_B32;
681 Register Src = RI.getSubReg(SrcReg, SubIdx);
682 bool AlignedDest = ((
Reg - AMDGPU::SGPR0) % 2) == 0;
683 bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0;
684 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
688 Opcode = AMDGPU::S_MOV_B64;
692 LastMI =
BuildMI(
MBB,
I,
DL,
TII.get(Opcode), RI.getSubReg(DestReg, SubIdx))
693 .
addReg(RI.getSubReg(SrcReg, SubIdx))
703 assert(FirstMI && LastMI);
711 LastMI->addRegisterKilled(SrcReg, &RI);
723 ((RI.getRegSizeInBits(*RC) == 16) ^
725 MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg;
730 if (DestReg == SrcReg) {
739 if (RC == &AMDGPU::VGPR_32RegClass) {
741 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
742 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
743 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
744 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
750 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
751 RC == &AMDGPU::SReg_32RegClass) {
759 if (DestReg == AMDGPU::VCC_LO) {
760 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
774 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
784 if (RC == &AMDGPU::SReg_64RegClass) {
792 if (DestReg == AMDGPU::VCC) {
793 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
807 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
820 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
838 if (RC == &AMDGPU::AGPR_32RegClass) {
839 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
840 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
859 const unsigned Size = RI.getRegSizeInBits(*RC);
862 AMDGPU::VGPR_HI16RegClass.
contains(SrcReg) ||
863 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
864 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
866 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
867 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
868 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
869 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
870 bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) ||
871 AMDGPU::SReg_LO16RegClass.contains(DestReg) ||
872 AMDGPU::AGPR_LO16RegClass.contains(DestReg);
873 bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
874 AMDGPU::SReg_LO16RegClass.contains(SrcReg) ||
875 AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
890 if (IsAGPRDst || IsAGPRSrc) {
891 if (!DstLow || !SrcLow) {
893 "Cannot use hi16 subreg with an AGPR!");
901 if (!DstLow || !SrcLow) {
903 "Cannot use hi16 subreg on VI!");
955 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
961 unsigned EltSize = 4;
962 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
965 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
968 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
970 Opcode = AMDGPU::INSTRUCTION_LIST_END;
972 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
973 }
else if ((Size % 64 == 0) && RI.
hasVGPRs(RC) &&
978 Opcode = AMDGPU::V_MOV_B64_e32;
981 Opcode = AMDGPU::V_PK_MOV_B32;
991 std::unique_ptr<RegScavenger> RS;
992 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
999 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1001 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1004 SubIdx = SubIndices[Idx];
1006 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1008 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1010 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1014 RI.getSubReg(SrcReg, SubIdx), UseKill, *RS,
1015 ImpDefSuper, ImpUseSuper);
1016 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1017 Register DstSubReg = RI.getSubReg(DestReg, SubIdx);
1018 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1036 .
addReg(RI.getSubReg(SrcReg, SubIdx));
1066 int64_t
Value)
const {
1069 if (RegClass == &AMDGPU::SReg_32RegClass ||
1070 RegClass == &AMDGPU::SGPR_32RegClass ||
1071 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1072 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1078 if (RegClass == &AMDGPU::SReg_64RegClass ||
1079 RegClass == &AMDGPU::SGPR_64RegClass ||
1080 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1086 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1097 unsigned EltSize = 4;
1098 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1100 if (RI.getRegSizeInBits(*RegClass) > 32) {
1101 Opcode = AMDGPU::S_MOV_B64;
1104 Opcode = AMDGPU::S_MOV_B32;
1110 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1111 int64_t IdxValue = Idx == 0 ?
Value : 0;
1114 get(Opcode), RI.getSubReg(DestReg, SubIndices[Idx]));
1121 return &AMDGPU::VGPR_32RegClass;
1134 "Not a VGPR32 reg");
1136 if (
Cond.size() == 1) {
1146 }
else if (
Cond.size() == 2) {
1148 switch (
Cond[0].getImm()) {
1149 case SIInstrInfo::SCC_TRUE: {
1152 : AMDGPU::S_CSELECT_B64), SReg)
1163 case SIInstrInfo::SCC_FALSE: {
1166 : AMDGPU::S_CSELECT_B64), SReg)
1177 case SIInstrInfo::VCCNZ: {
1191 case SIInstrInfo::VCCZ: {
1205 case SIInstrInfo::EXECNZ: {
1209 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1212 : AMDGPU::S_CSELECT_B64), SReg)
1223 case SIInstrInfo::EXECZ: {
1227 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1230 : AMDGPU::S_CSELECT_B64), SReg)
1279 return AMDGPU::COPY;
1280 if (RI.getRegSizeInBits(*DstRC) == 32) {
1281 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1282 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC)) {
1283 return AMDGPU::S_MOV_B64;
1284 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC)) {
1285 return AMDGPU::V_MOV_B64_PSEUDO;
1287 return AMDGPU::COPY;
1292 bool IsIndirectSrc)
const {
1293 if (IsIndirectSrc) {
1295 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1297 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1299 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1301 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1303 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1305 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1307 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1309 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1315 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1317 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1319 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1321 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1323 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1325 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1327 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1329 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1336 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1338 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1340 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1342 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1344 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1346 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1348 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1350 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1357 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1359 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1361 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1363 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1365 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1367 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1369 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1371 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1378 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1380 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1382 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1384 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1386 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1393 bool IsSGPR)
const {
1405 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1412 return AMDGPU::SI_SPILL_S32_SAVE;
1414 return AMDGPU::SI_SPILL_S64_SAVE;
1416 return AMDGPU::SI_SPILL_S96_SAVE;
1418 return AMDGPU::SI_SPILL_S128_SAVE;
1420 return AMDGPU::SI_SPILL_S160_SAVE;
1422 return AMDGPU::SI_SPILL_S192_SAVE;
1424 return AMDGPU::SI_SPILL_S224_SAVE;
1426 return AMDGPU::SI_SPILL_S256_SAVE;
1428 return AMDGPU::SI_SPILL_S512_SAVE;
1430 return AMDGPU::SI_SPILL_S1024_SAVE;
1439 return AMDGPU::SI_SPILL_V32_SAVE;
1441 return AMDGPU::SI_SPILL_V64_SAVE;
1443 return AMDGPU::SI_SPILL_V96_SAVE;
1445 return AMDGPU::SI_SPILL_V128_SAVE;
1447 return AMDGPU::SI_SPILL_V160_SAVE;
1449 return AMDGPU::SI_SPILL_V192_SAVE;
1451 return AMDGPU::SI_SPILL_V224_SAVE;
1453 return AMDGPU::SI_SPILL_V256_SAVE;
1455 return AMDGPU::SI_SPILL_V512_SAVE;
1457 return AMDGPU::SI_SPILL_V1024_SAVE;
1466 return AMDGPU::SI_SPILL_A32_SAVE;
1468 return AMDGPU::SI_SPILL_A64_SAVE;
1470 return AMDGPU::SI_SPILL_A96_SAVE;
1472 return AMDGPU::SI_SPILL_A128_SAVE;
1474 return AMDGPU::SI_SPILL_A160_SAVE;
1476 return AMDGPU::SI_SPILL_A192_SAVE;
1478 return AMDGPU::SI_SPILL_A224_SAVE;
1480 return AMDGPU::SI_SPILL_A256_SAVE;
1482 return AMDGPU::SI_SPILL_A512_SAVE;
1484 return AMDGPU::SI_SPILL_A1024_SAVE;
1493 return AMDGPU::SI_SPILL_AV32_SAVE;
1495 return AMDGPU::SI_SPILL_AV64_SAVE;
1497 return AMDGPU::SI_SPILL_AV96_SAVE;
1499 return AMDGPU::SI_SPILL_AV128_SAVE;
1501 return AMDGPU::SI_SPILL_AV160_SAVE;
1503 return AMDGPU::SI_SPILL_AV192_SAVE;
1505 return AMDGPU::SI_SPILL_AV224_SAVE;
1507 return AMDGPU::SI_SPILL_AV256_SAVE;
1509 return AMDGPU::SI_SPILL_AV512_SAVE;
1511 return AMDGPU::SI_SPILL_AV1024_SAVE;
1539 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1540 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1548 if (SrcReg.
isVirtual() && SpillSize == 4) {
1579 return AMDGPU::SI_SPILL_S32_RESTORE;
1581 return AMDGPU::SI_SPILL_S64_RESTORE;
1583 return AMDGPU::SI_SPILL_S96_RESTORE;
1585 return AMDGPU::SI_SPILL_S128_RESTORE;
1587 return AMDGPU::SI_SPILL_S160_RESTORE;
1589 return AMDGPU::SI_SPILL_S192_RESTORE;
1591 return AMDGPU::SI_SPILL_S224_RESTORE;
1593 return AMDGPU::SI_SPILL_S256_RESTORE;
1595 return AMDGPU::SI_SPILL_S512_RESTORE;
1597 return AMDGPU::SI_SPILL_S1024_RESTORE;
1606 return AMDGPU::SI_SPILL_V32_RESTORE;
1608 return AMDGPU::SI_SPILL_V64_RESTORE;
1610 return AMDGPU::SI_SPILL_V96_RESTORE;
1612 return AMDGPU::SI_SPILL_V128_RESTORE;
1614 return AMDGPU::SI_SPILL_V160_RESTORE;
1616 return AMDGPU::SI_SPILL_V192_RESTORE;
1618 return AMDGPU::SI_SPILL_V224_RESTORE;
1620 return AMDGPU::SI_SPILL_V256_RESTORE;
1622 return AMDGPU::SI_SPILL_V512_RESTORE;
1624 return AMDGPU::SI_SPILL_V1024_RESTORE;
1633 return AMDGPU::SI_SPILL_A32_RESTORE;
1635 return AMDGPU::SI_SPILL_A64_RESTORE;
1637 return AMDGPU::SI_SPILL_A96_RESTORE;
1639 return AMDGPU::SI_SPILL_A128_RESTORE;
1641 return AMDGPU::SI_SPILL_A160_RESTORE;
1643 return AMDGPU::SI_SPILL_A192_RESTORE;
1645 return AMDGPU::SI_SPILL_A224_RESTORE;
1647 return AMDGPU::SI_SPILL_A256_RESTORE;
1649 return AMDGPU::SI_SPILL_A512_RESTORE;
1651 return AMDGPU::SI_SPILL_A1024_RESTORE;
1660 return AMDGPU::SI_SPILL_AV32_RESTORE;
1662 return AMDGPU::SI_SPILL_AV64_RESTORE;
1664 return AMDGPU::SI_SPILL_AV96_RESTORE;
1666 return AMDGPU::SI_SPILL_AV128_RESTORE;
1668 return AMDGPU::SI_SPILL_AV160_RESTORE;
1670 return AMDGPU::SI_SPILL_AV192_RESTORE;
1672 return AMDGPU::SI_SPILL_AV224_RESTORE;
1674 return AMDGPU::SI_SPILL_AV256_RESTORE;
1676 return AMDGPU::SI_SPILL_AV512_RESTORE;
1678 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1705 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1706 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1711 if (DestReg.
isVirtual() && SpillSize == 4) {
1744 unsigned Quantity)
const {
1746 while (Quantity > 0) {
1761 if (HasNoTerminator) {
1762 if (
Info->returnsVoid()) {
1772 switch (
MI.getOpcode()) {
1774 if (
MI.isMetaInstruction())
1779 return MI.getOperand(0).getImm() + 1;
1789 switch (
MI.getOpcode()) {
1791 case AMDGPU::S_MOV_B64_term:
1794 MI.setDesc(
get(AMDGPU::S_MOV_B64));
1797 case AMDGPU::S_MOV_B32_term:
1800 MI.setDesc(
get(AMDGPU::S_MOV_B32));
1803 case AMDGPU::S_XOR_B64_term:
1806 MI.setDesc(
get(AMDGPU::S_XOR_B64));
1809 case AMDGPU::S_XOR_B32_term:
1812 MI.setDesc(
get(AMDGPU::S_XOR_B32));
1814 case AMDGPU::S_OR_B64_term:
1817 MI.setDesc(
get(AMDGPU::S_OR_B64));
1819 case AMDGPU::S_OR_B32_term:
1822 MI.setDesc(
get(AMDGPU::S_OR_B32));
1825 case AMDGPU::S_ANDN2_B64_term:
1828 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
1831 case AMDGPU::S_ANDN2_B32_term:
1834 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
1837 case AMDGPU::S_AND_B64_term:
1840 MI.setDesc(
get(AMDGPU::S_AND_B64));
1843 case AMDGPU::S_AND_B32_term:
1846 MI.setDesc(
get(AMDGPU::S_AND_B32));
1849 case AMDGPU::V_MOV_B64_PSEUDO: {
1851 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
1852 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
1858 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
1862 if (
SrcOp.isImm()) {
1864 APInt Lo(32,
Imm.getLoBits(32).getZExtValue());
1865 APInt Hi(32,
Imm.getHiBits(32).getZExtValue());
1869 .
addImm(Lo.getSExtValue())
1871 .
addImm(Lo.getSExtValue())
1879 .
addImm(Lo.getSExtValue())
1882 .
addImm(Hi.getSExtValue())
1908 MI.eraseFromParent();
1911 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
1915 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
1920 MI.setDesc(
get(AMDGPU::S_MOV_B64));
1925 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
1926 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
1928 APInt Lo(32,
Imm.getLoBits(32).getZExtValue());
1929 APInt Hi(32,
Imm.getHiBits(32).getZExtValue());
1931 .
addImm(Lo.getSExtValue())
1934 .
addImm(Hi.getSExtValue())
1936 MI.eraseFromParent();
1939 case AMDGPU::V_SET_INACTIVE_B32: {
1940 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
1941 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1945 .
add(
MI.getOperand(1));
1949 .
add(
MI.getOperand(2));
1952 MI.eraseFromParent();
1955 case AMDGPU::V_SET_INACTIVE_B64: {
1956 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
1957 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1959 MI.getOperand(0).getReg())
1960 .
add(
MI.getOperand(1));
1965 MI.getOperand(0).getReg())
1966 .
add(
MI.getOperand(2));
1970 MI.eraseFromParent();
1973 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
1974 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
1975 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
1976 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
1977 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
1978 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
1979 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
1980 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
1981 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
1982 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
1983 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
1984 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
1985 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
1986 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
1987 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
1988 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
1989 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
1990 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
1991 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
1992 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
1993 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
1998 Opc = AMDGPU::V_MOVRELD_B32_e32;
2000 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2001 : AMDGPU::S_MOVRELD_B32;
2006 bool IsUndef =
MI.getOperand(1).isUndef();
2007 unsigned SubReg =
MI.getOperand(3).getImm();
2008 assert(VecReg ==
MI.getOperand(1).getReg());
2013 .
add(
MI.getOperand(2))
2017 const int ImpDefIdx =
2019 const int ImpUseIdx = ImpDefIdx + 1;
2021 MI.eraseFromParent();
2024 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2025 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2026 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2027 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2028 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2029 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2030 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2031 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2034 bool IsUndef =
MI.getOperand(1).isUndef();
2043 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2047 .
add(
MI.getOperand(2))
2053 const int ImpUseIdx = ImpDefIdx + 1;
2060 MI.eraseFromParent();
2063 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2064 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2065 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2066 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2067 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2068 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2069 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2070 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2074 bool IsUndef =
MI.getOperand(1).isUndef();
2092 MI.eraseFromParent();
2095 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2110 .
add(
MI.getOperand(1)));
2114 MIB.
add(
MI.getOperand(2));
2119 MI.eraseFromParent();
2122 case AMDGPU::ENTER_STRICT_WWM: {
2126 : AMDGPU::S_OR_SAVEEXEC_B64));
2129 case AMDGPU::ENTER_STRICT_WQM: {
2132 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2133 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2134 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2138 MI.eraseFromParent();
2141 case AMDGPU::EXIT_STRICT_WWM:
2142 case AMDGPU::EXIT_STRICT_WQM: {
2145 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2148 case AMDGPU::SI_RETURN: {
2162 MI.eraseFromParent();
2169 std::pair<MachineInstr*, MachineInstr*>
2171 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2176 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2177 return std::make_pair(&
MI,
nullptr);
2188 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2190 if (Dst.isPhysical()) {
2191 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2198 for (
unsigned I = 1;
I <= 2; ++
I) {
2201 if (
SrcOp.isImm()) {
2203 Imm.ashrInPlace(Part * 32);
2204 MovDPP.addImm(
Imm.getLoBits(32).getZExtValue());
2208 if (Src.isPhysical())
2209 MovDPP.addReg(RI.getSubReg(Src, Sub));
2215 for (
unsigned I = 3;
I <
MI.getNumExplicitOperands(); ++
I)
2216 MovDPP.addImm(
MI.getOperand(
I).getImm());
2218 Split[Part] = MovDPP;
2222 if (Dst.isVirtual())
2229 MI.eraseFromParent();
2230 return std::make_pair(Split[0], Split[1]);
2235 unsigned Src0OpName,
2237 unsigned Src1OpName)
const {
2244 "All commutable instructions have both src0 and src1 modifiers");
2246 int Src0ModsVal = Src0Mods->
getImm();
2247 int Src1ModsVal = Src1Mods->
getImm();
2249 Src1Mods->
setImm(Src0ModsVal);
2250 Src0Mods->
setImm(Src1ModsVal);
2259 bool IsKill = RegOp.
isKill();
2261 bool IsUndef = RegOp.
isUndef();
2262 bool IsDebug = RegOp.
isDebug();
2264 if (NonRegOp.
isImm())
2266 else if (NonRegOp.
isFI())
2285 unsigned Src1Idx)
const {
2286 assert(!NewMI &&
"this should never be used");
2288 unsigned Opc =
MI.getOpcode();
2290 if (CommutedOpcode == -1)
2294 static_cast<int>(Src0Idx) &&
2296 static_cast<int>(Src1Idx) &&
2297 "inconsistency with findCommutedOpIndices");
2324 Src1, AMDGPU::OpName::src1_modifiers);
2336 unsigned &SrcOpIdx0,
2337 unsigned &SrcOpIdx1)
const {
2342 unsigned &SrcOpIdx1)
const {
2355 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2359 int64_t BrOffset)
const {
2362 assert(BranchOp != AMDGPU::S_SETPC_B64);
2376 if (
MI.getOpcode() == AMDGPU::S_SETPC_B64) {
2382 return MI.getOperand(0).getMBB();
2390 assert(RS &&
"RegScavenger required for long branching");
2392 "new block should be inserted for expanding unconditional branch");
2395 "restore block should be inserted for restoring clobbered registers");
2412 MCCtx.createTempSymbol(
"post_getpc",
true);
2416 MCCtx.createTempSymbol(
"offset_lo",
true);
2418 MCCtx.createTempSymbol(
"offset_hi",
true);
2421 .
addReg(PCReg, 0, AMDGPU::sub0)
2425 .
addReg(PCReg, 0, AMDGPU::sub1)
2481 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2498 unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
2500 case SIInstrInfo::SCC_TRUE:
2501 return AMDGPU::S_CBRANCH_SCC1;
2502 case SIInstrInfo::SCC_FALSE:
2503 return AMDGPU::S_CBRANCH_SCC0;
2504 case SIInstrInfo::VCCNZ:
2505 return AMDGPU::S_CBRANCH_VCCNZ;
2506 case SIInstrInfo::VCCZ:
2507 return AMDGPU::S_CBRANCH_VCCZ;
2508 case SIInstrInfo::EXECNZ:
2509 return AMDGPU::S_CBRANCH_EXECNZ;
2510 case SIInstrInfo::EXECZ:
2511 return AMDGPU::S_CBRANCH_EXECZ;
2517 SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
2519 case AMDGPU::S_CBRANCH_SCC0:
2521 case AMDGPU::S_CBRANCH_SCC1:
2523 case AMDGPU::S_CBRANCH_VCCNZ:
2525 case AMDGPU::S_CBRANCH_VCCZ:
2527 case AMDGPU::S_CBRANCH_EXECNZ:
2529 case AMDGPU::S_CBRANCH_EXECZ:
2541 bool AllowModify)
const {
2542 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
2544 TBB =
I->getOperand(0).getMBB();
2550 if (
I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
2551 CondBB =
I->getOperand(1).getMBB();
2552 Cond.push_back(
I->getOperand(0));
2554 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
2555 if (Pred == INVALID_BR)
2558 CondBB =
I->getOperand(0).getMBB();
2560 Cond.push_back(
I->getOperand(1));
2570 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
2572 FBB =
I->getOperand(0).getMBB();
2582 bool AllowModify)
const {
2590 while (
I !=
E && !
I->isBranch() && !
I->isReturn()) {
2591 switch (
I->getOpcode()) {
2592 case AMDGPU::S_MOV_B64_term:
2593 case AMDGPU::S_XOR_B64_term:
2594 case AMDGPU::S_OR_B64_term:
2595 case AMDGPU::S_ANDN2_B64_term:
2596 case AMDGPU::S_AND_B64_term:
2597 case AMDGPU::S_MOV_B32_term:
2598 case AMDGPU::S_XOR_B32_term:
2599 case AMDGPU::S_OR_B32_term:
2600 case AMDGPU::S_ANDN2_B32_term:
2601 case AMDGPU::S_AND_B32_term:
2604 case AMDGPU::SI_ELSE:
2605 case AMDGPU::SI_KILL_I1_TERMINATOR:
2606 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
2623 int *BytesRemoved)
const {
2625 unsigned RemovedSize = 0;
2628 if (
MI.isBranch() ||
MI.isReturn()) {
2630 MI.eraseFromParent();
2636 *BytesRemoved = RemovedSize;
2653 int *BytesAdded)
const {
2654 if (!FBB &&
Cond.empty()) {
2662 if(
Cond.size() == 1 &&
Cond[0].isReg()) {
2672 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
2710 if (
Cond.size() != 2) {
2725 Register FalseReg,
int &CondCycles,
2726 int &TrueCycles,
int &FalseCycles)
const {
2727 switch (
Cond[0].getImm()) {
2736 CondCycles = TrueCycles = FalseCycles = NumInsts;
2739 return RI.
hasVGPRs(RC) && NumInsts <= 6;
2753 if (NumInsts % 2 == 0)
2756 CondCycles = TrueCycles = FalseCycles = NumInsts;
2768 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
2769 if (Pred == VCCZ || Pred == SCC_FALSE) {
2770 Pred =
static_cast<BranchPredicate
>(-Pred);
2776 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
2778 if (DstSize == 32) {
2780 if (Pred == SCC_TRUE) {
2795 if (DstSize == 64 && Pred == SCC_TRUE) {
2805 static const int16_t Sub0_15[] = {
2806 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
2807 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
2808 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
2809 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
2812 static const int16_t Sub0_15_64[] = {
2813 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
2814 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
2815 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
2816 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
2819 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
2821 const int16_t *SubIndices = Sub0_15;
2822 int NElts = DstSize / 32;
2826 if (Pred == SCC_TRUE) {
2828 SelOp = AMDGPU::S_CSELECT_B32;
2829 EltRC = &AMDGPU::SGPR_32RegClass;
2831 SelOp = AMDGPU::S_CSELECT_B64;
2832 EltRC = &AMDGPU::SGPR_64RegClass;
2833 SubIndices = Sub0_15_64;
2839 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
2844 for (
int Idx = 0; Idx != NElts; ++Idx) {
2846 Regs.push_back(DstElt);
2848 unsigned SubIdx = SubIndices[Idx];
2851 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
2854 .
addReg(FalseReg, 0, SubIdx)
2855 .
addReg(TrueReg, 0, SubIdx);
2859 .
addReg(TrueReg, 0, SubIdx)
2860 .
addReg(FalseReg, 0, SubIdx);
2872 switch (
MI.getOpcode()) {
2873 case AMDGPU::V_MOV_B32_e32:
2874 case AMDGPU::V_MOV_B32_e64:
2875 case AMDGPU::V_MOV_B64_PSEUDO:
2876 case AMDGPU::V_MOV_B64_e32:
2877 case AMDGPU::V_MOV_B64_e64:
2878 case AMDGPU::S_MOV_B32:
2879 case AMDGPU::S_MOV_B64:
2881 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
2882 case AMDGPU::V_ACCVGPR_READ_B32_e64:
2883 case AMDGPU::V_ACCVGPR_MOV_B32:
2891 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
2893 AMDGPU::OpName::omod};
2896 unsigned Opc =
MI.getOpcode();
2906 switch (
DefMI.getOpcode()) {
2909 case AMDGPU::S_MOV_B64:
2914 case AMDGPU::V_MOV_B32_e32:
2915 case AMDGPU::S_MOV_B32:
2916 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
2923 if (!ImmOp->
isImm())
2926 unsigned Opc =
UseMI.getOpcode();
2927 if (Opc == AMDGPU::COPY) {
2931 unsigned NewOpc =
isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2934 if (
UseMI.getOperand(1).getSubReg() == AMDGPU::hi16)
2940 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
2950 UseMI.getOperand(0).setSubReg(0);
2953 UseMI.getOperand(0).setReg(DstReg);
2959 UseMI.getOperand(1).ChangeToImmediate(
Imm.getSExtValue());
2964 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
2965 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
2966 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
2967 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64) {
2982 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
2983 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
2984 bool IsFMA = Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
2985 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64;
2999 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32 : AMDGPU::V_FMAMK_F16)
3000 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3012 unsigned Src1SubReg = Src1->
getSubReg();
3017 if (Opc == AMDGPU::V_MAC_F32_e64 ||
3018 Opc == AMDGPU::V_MAC_F16_e64 ||
3019 Opc == AMDGPU::V_FMAC_F32_e64 ||
3020 Opc == AMDGPU::V_FMAC_F16_e64)
3021 UseMI.untieRegOperand(
3031 DefMI.eraseFromParent();
3040 bool Src0Inlined =
false;
3041 if (Src0->
isReg()) {
3046 if (
Def &&
Def->isMoveImmediate() &&
3061 if (Src1->
isReg() && !Src0Inlined ) {
3064 if (
Def &&
Def->isMoveImmediate() &&
3067 commuteInstruction(
UseMI)) {
3078 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32 : AMDGPU::V_FMAAK_F16)
3079 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3088 if (Opc == AMDGPU::V_MAC_F32_e64 ||
3089 Opc == AMDGPU::V_MAC_F16_e64 ||
3090 Opc == AMDGPU::V_FMAC_F32_e64 ||
3091 Opc == AMDGPU::V_FMAC_F16_e64)
3092 UseMI.untieRegOperand(
3108 DefMI.eraseFromParent();
3120 if (BaseOps1.
size() != BaseOps2.
size())
3122 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3123 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3130 int WidthB,
int OffsetB) {
3131 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3132 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3133 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3134 return LowOffset + LowWidth <= HighOffset;
3137 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3140 int64_t Offset0, Offset1;
3141 unsigned Dummy0, Dummy1;
3142 bool Offset0IsScalable, Offset1IsScalable;
3156 unsigned Width0 = MIa.
memoperands().front()->getSize();
3157 unsigned Width1 = MIb.
memoperands().front()->getSize();
3164 "MIa must load from or modify a memory location");
3166 "MIb must load from or modify a memory location");
3182 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3189 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3196 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3203 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3213 if (
Reg.isPhysical())
3217 Imm =
Def->getOperand(1).getImm();
3237 unsigned NumOps =
MI.getNumOperands();
3238 for (
unsigned I = 1;
I < NumOps; ++
I) {
3240 if (
Op.isReg() &&
Op.isKill())
3250 unsigned Opc =
MI.getOpcode();
3254 if (NewMFMAOpc != -1) {
3257 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I)
3258 MIB.
add(
MI.getOperand(
I));
3269 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I)
3280 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3281 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64;
3282 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3283 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3284 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3285 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3286 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3287 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3288 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3289 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3290 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3291 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3292 bool Src0Literal =
false;
3297 case AMDGPU::V_MAC_F16_e64:
3298 case AMDGPU::V_FMAC_F16_e64:
3299 case AMDGPU::V_MAC_F32_e64:
3300 case AMDGPU::V_MAC_LEGACY_F32_e64:
3301 case AMDGPU::V_FMAC_F32_e64:
3302 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3303 case AMDGPU::V_FMAC_F64_e64:
3305 case AMDGPU::V_MAC_F16_e32:
3306 case AMDGPU::V_FMAC_F16_e32:
3307 case AMDGPU::V_MAC_F32_e32:
3308 case AMDGPU::V_MAC_LEGACY_F32_e32:
3309 case AMDGPU::V_FMAC_F32_e32:
3310 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3311 case AMDGPU::V_FMAC_F64_e32: {
3313 AMDGPU::OpName::src0);
3339 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
3345 const auto killDef = [&]() ->
void {
3362 IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
3363 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
3377 unsigned NewOpc = IsFMA
3378 ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
3379 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
3424 unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
3425 : IsF64 ? AMDGPU::V_FMA_F64_e64
3427 ? AMDGPU::V_FMA_LEGACY_F32_e64
3428 : AMDGPU::V_FMA_F32_e64
3429 : IsF16 ? AMDGPU::V_MAD_F16_e64
3430 : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
3431 : AMDGPU::V_MAD_F32_e64;
3443 .
addImm(Clamp ? Clamp->getImm() : 0)
3455 switch (
MI.getOpcode()) {
3456 case AMDGPU::S_SET_GPR_IDX_ON:
3457 case AMDGPU::S_SET_GPR_IDX_MODE:
3458 case AMDGPU::S_SET_GPR_IDX_OFF:
3476 if (
MI.isTerminator() ||
MI.isPosition())
3483 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
3489 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
3490 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
3491 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
3497 Opcode == AMDGPU::DS_GWS_INIT ||
3498 Opcode == AMDGPU::DS_GWS_SEMA_V ||
3499 Opcode == AMDGPU::DS_GWS_SEMA_BR ||
3500 Opcode == AMDGPU::DS_GWS_SEMA_P ||
3501 Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
3502 Opcode == AMDGPU::DS_GWS_BARRIER;
3509 if (
const MCPhysReg *ImpDef =
MI.getDesc().getImplicitDefs()) {
3510 for (; ImpDef && *ImpDef; ++ImpDef) {
3511 if (*ImpDef == AMDGPU::MODE)
3520 unsigned Opcode =
MI.getOpcode();
3535 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
3538 Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
3541 if (
MI.isCall() ||
MI.isInlineAsm())
3553 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
3554 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32)
3562 if (
MI.isMetaInstruction())
3566 if (
MI.isCopyLike()) {
3571 return MI.readsRegister(AMDGPU::EXEC, &RI);
3582 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
3586 switch (
Imm.getBitWidth()) {
3630 int32_t Trunc =
static_cast<int32_t
>(
Imm);
3668 int16_t Trunc =
static_cast<int16_t
>(
Imm);
3738 AMDGPU::OpName::src2))
3754 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
3769 AMDGPU::OpName::src0_modifiers) != -1;
3775 return Mods && Mods->
getImm();
3788 switch (
MI.getOpcode()) {
3789 default:
return false;
3791 case AMDGPU::V_ADDC_U32_e64:
3792 case AMDGPU::V_SUBB_U32_e64:
3793 case AMDGPU::V_SUBBREV_U32_e64: {
3801 case AMDGPU::V_MAC_F16_e64:
3802 case AMDGPU::V_MAC_F32_e64:
3803 case AMDGPU::V_MAC_LEGACY_F32_e64:
3804 case AMDGPU::V_FMAC_F16_e64:
3805 case AMDGPU::V_FMAC_F32_e64:
3806 case AMDGPU::V_FMAC_F64_e64:
3807 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3813 case AMDGPU::V_CNDMASK_B32_e64:
3844 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
3853 unsigned Op32)
const {
3863 Inst32.
add(
MI.getOperand(0));
3867 assert(((
MI.getOperand(0).getReg() == AMDGPU::VCC) ||
3868 (
MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) &&
3882 if (Op32Src2Idx != -1) {
3917 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
3923 MO.
getReg() == AMDGPU::VCC ||
3924 MO.
getReg() == AMDGPU::VCC_LO;
3926 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
3927 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
3937 switch (MO.getReg()) {
3939 case AMDGPU::VCC_LO:
3940 case AMDGPU::VCC_HI:
3942 case AMDGPU::FLAT_SCR:
3950 return AMDGPU::NoRegister;
3955 switch (
MI.getOpcode()) {
3956 case AMDGPU::V_READLANE_B32:
3957 case AMDGPU::V_WRITELANE_B32:
3964 if (
MI.isPreISelOpcode() ||
3965 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
3976 if (
SubReg.getReg().isPhysical())
3979 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
3986 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
3996 if (Src0Idx == -1) {
4008 ErrInfo =
"Instruction has wrong number of operands.";
4012 if (
MI.isInlineAsm()) {
4026 ErrInfo =
"inlineasm operand has incorrect register class.";
4034 if (
isMIMG(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4035 ErrInfo =
"missing memory operand from MIMG instruction.";
4043 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4044 "all fp values to integers.";
4052 if (
MI.getOperand(
i).isImm() ||
MI.getOperand(
i).isGlobal()) {
4053 ErrInfo =
"Illegal immediate value for operand.";
4074 ErrInfo =
"Illegal immediate value for operand.";
4084 if (!
MI.getOperand(
i).isImm() && !
MI.getOperand(
i).isFI()) {
4085 ErrInfo =
"Expected immediate, but got non-immediate";
4115 ErrInfo =
"Subtarget requires even aligned vector registers";
4120 if (RegClass != -1) {
4121 if (
Reg.isVirtual())
4126 ErrInfo =
"Operand has incorrect register class.";
4135 ErrInfo =
"SDWA is not supported on this target";
4141 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4149 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4156 "Only reg allowed as operands in SDWA instructions on GFX9+";
4165 if (OMod !=
nullptr &&
4167 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4173 if (
isVOPC(BasicOpcode)) {
4177 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4178 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4184 if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
4185 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4191 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4192 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4202 if (!Dst.isReg() || !Dst.isTied()) {
4203 ErrInfo =
"Dst register should have tied register";
4208 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4211 "Dst register should be tied to implicit use of preserved register";
4214 Dst.getReg() != TiedMO.
getReg()) {
4215 ErrInfo =
"Dst register should use same physical register as preserved";
4222 if (
isMIMG(
MI.getOpcode()) && !
MI.mayStore()) {
4247 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
4248 if (RegCount > DstSize) {
4249 ErrInfo =
"MIMG instruction returns too many registers for dst "
4259 unsigned ConstantBusCount = 0;
4260 bool UsesLiteral =
false;
4267 LiteralVal = &
MI.getOperand(ImmIdx);
4276 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
4283 if (
llvm::all_of(SGPRsUsed, [SGPRUsed](
unsigned SGPR) {
4284 return SGPRUsed != SGPR;
4287 SGPRsUsed.push_back(SGPRUsed);
4296 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
4304 if (SGPRUsed != AMDGPU::NoRegister) {
4306 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
4307 return !RI.regsOverlap(SGPRUsed, SGPR);
4310 SGPRsUsed.push_back(SGPRUsed);
4317 Opcode != AMDGPU::V_WRITELANE_B32) {
4318 ErrInfo =
"VOP* instruction violates constant bus restriction";
4323 ErrInfo =
"VOP3 instruction uses literal";
4330 if (Desc.
getOpcode() == AMDGPU::V_WRITELANE_B32) {
4331 unsigned SGPRCount = 0;
4332 Register SGPRUsed = AMDGPU::NoRegister;
4334 for (
int OpIdx : {Src0Idx, Src1Idx}) {
4342 if (MO.
getReg() != SGPRUsed)
4348 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
4355 if (Desc.
getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
4356 Desc.
getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
4363 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
4373 ErrInfo =
"ABS not allowed in VOP3B instructions";
4386 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
4395 ErrInfo =
"invalid branch target for SOPK instruction";
4402 ErrInfo =
"invalid immediate for SOPK instruction";
4407 ErrInfo =
"invalid immediate for SOPK instruction";
4414 if (Desc.
getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
4415 Desc.
getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
4416 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
4417 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
4418 const bool IsDst = Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
4419 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
4423 const unsigned NumImplicitOps = IsDst ? 2 : 1;
4428 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
4429 ErrInfo =
"missing implicit register operands";
4435 if (!Dst->isUse()) {
4436 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
4441 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
4442 UseOpIdx != StaticNumOps + 1) {
4443 ErrInfo =
"movrel implicit operands should be tied";
4450 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
4452 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
4453 ErrInfo =
"src0 should be subreg of implicit vector use";
4461 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
4462 ErrInfo =
"VALU instruction does not implicitly read exec mask";
4468 if (
MI.mayStore() &&
4474 ErrInfo =
"scalar stores must use m0 as offset register";
4482 if (Offset->getImm() != 0) {
4483 ErrInfo =
"subtarget does not support offsets in flat instructions";
4492 AMDGPU::OpName::vaddr0);
4501 ErrInfo =
"dim is out of range";
4508 IsA16 = R128A16->
getImm() != 0;
4511 IsA16 = A16->
getImm() != 0;
4514 bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
4516 unsigned AddrWords =
4519 unsigned VAddrWords;
4521 VAddrWords = SRsrcIdx - VAddr0Idx;
4529 if (VAddrWords != AddrWords) {
4531 <<
" but got " << VAddrWords <<
"\n");
4532 ErrInfo =
"bad vaddr size";
4550 ErrInfo =
"Invalid dpp_ctrl value";
4555 ErrInfo =
"Invalid dpp_ctrl value: "
4556 "wavefront shifts are not supported on GFX10+";
4561 ErrInfo =
"Invalid dpp_ctrl value: "
4562 "broadcasts are not supported on GFX10+";
4569 !ST.hasGFX90AInsts()) {
4570 ErrInfo =
"Invalid dpp_ctrl value: "
4571 "row_newbroadcast/row_share is not supported before "
4575 ErrInfo =
"Invalid dpp_ctrl value: "
4576 "row_share and row_xmask are not supported before GFX10";
4583 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
4586 Desc.
OpInfo[DstIdx].
RegClass == AMDGPU::VReg_64_Align2RegClassID)) ||
4590 AMDGPU::VReg_64_Align2RegClassID)))) &&
4592 ErrInfo =
"Invalid dpp_ctrl value: "
4593 "64 bit dpp only support row_newbcast";
4600 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
4601 : AMDGPU::OpName::vdata;
4610 ErrInfo =
"Invalid register class: "
4611 "vdata and vdst should be both VGPR or AGPR";
4614 if (
Data && Data2 &&
4616 ErrInfo =
"Invalid register class: "
4617 "both data operands should be VGPR or AGPR";
4621 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
4624 ErrInfo =
"Invalid register class: "
4625 "agpr loads and stores not supported on this GPU";
4632 const auto isAlignedReg = [&
MI, &
MRI,
this](
unsigned OpName) ->
bool {
4637 if (
Reg.isPhysical())
4644 if (
MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
4645 MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
4646 MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
4648 if (!isAlignedReg(AMDGPU::OpName::data0)) {
4649 ErrInfo =
"Subtarget requires even aligned vector registers "
4650 "for DS_GWS instructions";
4656 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
4657 ErrInfo =
"Subtarget requires even aligned vector registers "
4658 "for vaddr operand of image instructions";
4664 if (
MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
4667 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
4668 ErrInfo =
"Invalid register class: "
4669 "v_accvgpr_write with an SGPR is not supported on this GPU";
4674 if (Desc.
getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
4677 ErrInfo =
"pseudo expects only physical SGPRs";
4686 switch (
MI.getOpcode()) {
4687 default:
return AMDGPU::INSTRUCTION_LIST_END;
4688 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
4689 case AMDGPU::COPY:
return AMDGPU::COPY;
4690 case AMDGPU::PHI:
return AMDGPU::PHI;
4691 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
4693 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
4694 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
4695 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
4696 case AMDGPU::S_MOV_B32: {
4698 return MI.getOperand(1).isReg() ||
4700 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
4702 case AMDGPU::S_ADD_I32:
4703 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
4704 case AMDGPU::S_ADDC_U32:
4705 return AMDGPU::V_ADDC_U32_e32;
4706 case AMDGPU::S_SUB_I32:
4707 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
4710 case AMDGPU::S_ADD_U32:
4711 return AMDGPU::V_ADD_CO_U32_e32;
4712 case AMDGPU::S_SUB_U32:
4713 return AMDGPU::V_SUB_CO_U32_e32;
4714 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
4715 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
4716 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
4717 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
4718 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
4719 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
4720 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
4721 case AMDGPU::S_XNOR_B32:
4722 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
4723 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
4724 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
4725 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
4726 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
4727 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
4728 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
4729 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
4730 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
4731 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
4732 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
4733 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
4734 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
4735 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
4736 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
4737 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
4738 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
4739 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
4740 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
4741 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
4742 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
4743 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
4744 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
4745 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
4746 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
4747 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
4748 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
4749 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
4750 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
4751 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
4752 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
4753 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
4754 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
4755 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
4756 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
4757 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
4758 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
4759 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
4760 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
4763 "Unexpected scalar opcode without corresponding vector one!");
4770 bool IsAllocatable) {
4776 case AMDGPU::AV_32RegClassID:
4777 RCID = AMDGPU::VGPR_32RegClassID;
4779 case AMDGPU::AV_64RegClassID:
4780 RCID = AMDGPU::VReg_64RegClassID;
4782 case AMDGPU::AV_96RegClassID:
4783 RCID = AMDGPU::VReg_96RegClassID;
4785 case AMDGPU::AV_128RegClassID:
4786 RCID = AMDGPU::VReg_128RegClassID;
4788 case AMDGPU::AV_160RegClassID:
4789 RCID = AMDGPU::VReg_160RegClassID;
4791 case AMDGPU::AV_512RegClassID:
4792 RCID = AMDGPU::VReg_512RegClassID;
4809 bool IsAllocatable =
false;
4819 AMDGPU::OpName::vdst);
4822 : AMDGPU::OpName::vdata);
4823 if (DataIdx != -1) {
4824 IsAllocatable = VDstIdx != -1 ||