29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc(
"Enable spilling SGPRs to VGPRs"),
38std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
49 const Twine &ErrMsg) {
122 MI->getOperand(0).isKill(),
Index,
RS) {}
137 MovOpc = AMDGPU::S_MOV_B32;
138 NotOpc = AMDGPU::S_NOT_B32;
141 MovOpc = AMDGPU::S_MOV_B64;
142 NotOpc = AMDGPU::S_NOT_B64;
147 SuperReg != AMDGPU::EXEC &&
"exec should never spill");
178 assert(
RS &&
"Cannot spill SGPR to memory without RegScavenger");
207 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
229 "unhandled SGPR spill to memory");
239 I->getOperand(2).setIsDead();
274 I->getOperand(2).setIsDead();
305 "unhandled SGPR spill to memory");
330 ST.getAMDGPUDwarfFlavour(),
334 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
335 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
336 (getSubRegIndexLaneMask(AMDGPU::lo16) |
337 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
338 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
339 "getNumCoveredRegs() will not work with generated subreg masks!");
341 RegPressureIgnoredUnits.
resize(getNumRegUnits());
343 for (
auto Reg : AMDGPU::VGPR_16RegClass) {
345 RegPressureIgnoredUnits.
set(*regunits(Reg).begin());
351 static auto InitializeRegSplitPartsOnce = [
this]() {
352 for (
unsigned Idx = 1, E = getNumSubRegIndices() - 1;
Idx < E; ++
Idx) {
353 unsigned Size = getSubRegIdxSize(
Idx);
356 std::vector<int16_t> &Vec = RegSplitParts[
Size / 32 - 1];
357 unsigned Pos = getSubRegIdxOffset(
Idx);
362 unsigned MaxNumParts = 1024 /
Size;
363 Vec.resize(MaxNumParts);
371 static auto InitializeSubRegFromChannelTableOnce = [
this]() {
372 for (
auto &Row : SubRegFromChannelTable)
373 Row.fill(AMDGPU::NoSubRegister);
374 for (
unsigned Idx = 1;
Idx < getNumSubRegIndices(); ++
Idx) {
375 unsigned Width = getSubRegIdxSize(
Idx) / 32;
376 unsigned Offset = getSubRegIdxOffset(
Idx) / 32;
381 unsigned TableIdx = Width - 1;
382 assert(TableIdx < SubRegFromChannelTable.size());
384 SubRegFromChannelTable[TableIdx][
Offset] =
Idx;
388 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
390 InitializeSubRegFromChannelTableOnce);
408 : CSR_AMDGPU_SaveList;
410 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
411 : CSR_AMDGPU_SI_Gfx_SaveList;
413 return CSR_AMDGPU_CS_ChainPreserve_SaveList;
416 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
417 return &NoCalleeSavedReg;
434 : CSR_AMDGPU_RegMask;
436 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
437 : CSR_AMDGPU_SI_Gfx_RegMask;
442 return AMDGPU_AllVGPRs_RegMask;
449 return CSR_AMDGPU_NoRegs_RegMask;
453 return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;
464 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
465 return &AMDGPU::AV_32RegClass;
466 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
467 return &AMDGPU::AV_64RegClass;
468 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
469 RC == &AMDGPU::AReg_64_Align2RegClass)
470 return &AMDGPU::AV_64_Align2RegClass;
471 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
472 return &AMDGPU::AV_96RegClass;
473 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
474 RC == &AMDGPU::AReg_96_Align2RegClass)
475 return &AMDGPU::AV_96_Align2RegClass;
476 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
477 return &AMDGPU::AV_128RegClass;
478 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
479 RC == &AMDGPU::AReg_128_Align2RegClass)
480 return &AMDGPU::AV_128_Align2RegClass;
481 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
482 return &AMDGPU::AV_160RegClass;
483 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
484 RC == &AMDGPU::AReg_160_Align2RegClass)
485 return &AMDGPU::AV_160_Align2RegClass;
486 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
487 return &AMDGPU::AV_192RegClass;
488 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
489 RC == &AMDGPU::AReg_192_Align2RegClass)
490 return &AMDGPU::AV_192_Align2RegClass;
491 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
492 return &AMDGPU::AV_256RegClass;
493 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
494 RC == &AMDGPU::AReg_256_Align2RegClass)
495 return &AMDGPU::AV_256_Align2RegClass;
496 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
497 return &AMDGPU::AV_512RegClass;
498 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
499 RC == &AMDGPU::AReg_512_Align2RegClass)
500 return &AMDGPU::AV_512_Align2RegClass;
501 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
502 return &AMDGPU::AV_1024RegClass;
503 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
504 RC == &AMDGPU::AReg_1024_Align2RegClass)
505 return &AMDGPU::AV_1024_Align2RegClass;
535 return AMDGPU_AllVGPRs_RegMask;
539 return AMDGPU_AllAGPRs_RegMask;
543 return AMDGPU_AllVectorRegs_RegMask;
547 return AMDGPU_AllAllocatableSRegs_RegMask;
554 assert(NumRegIndex &&
"Not implemented");
555 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].
size());
556 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
561 const unsigned Align,
564 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
565 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
573std::pair<unsigned, unsigned>
577 unsigned MaxNumAGPRs = MaxNumVGPRs;
578 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
591 MaxNumAGPRs = MaxNumVGPRs;
593 if (MaxNumVGPRs > TotalNumVGPRs) {
594 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
595 MaxNumVGPRs = TotalNumVGPRs;
601 return std::pair(MaxNumVGPRs, MaxNumAGPRs);
614 reserveRegisterTuples(
Reserved, AMDGPU::EXEC);
615 reserveRegisterTuples(
Reserved, AMDGPU::FLAT_SCR);
618 reserveRegisterTuples(
Reserved, AMDGPU::M0);
621 reserveRegisterTuples(
Reserved, AMDGPU::SRC_VCCZ);
622 reserveRegisterTuples(
Reserved, AMDGPU::SRC_EXECZ);
623 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SCC);
626 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_BASE);
627 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_LIMIT);
628 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_BASE);
629 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
632 reserveRegisterTuples(
Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
635 reserveRegisterTuples(
Reserved, AMDGPU::XNACK_MASK);
638 reserveRegisterTuples(
Reserved, AMDGPU::LDS_DIRECT);
641 reserveRegisterTuples(
Reserved, AMDGPU::TBA);
642 reserveRegisterTuples(
Reserved, AMDGPU::TMA);
643 reserveRegisterTuples(
Reserved, AMDGPU::TTMP0_TTMP1);
644 reserveRegisterTuples(
Reserved, AMDGPU::TTMP2_TTMP3);
645 reserveRegisterTuples(
Reserved, AMDGPU::TTMP4_TTMP5);
646 reserveRegisterTuples(
Reserved, AMDGPU::TTMP6_TTMP7);
647 reserveRegisterTuples(
Reserved, AMDGPU::TTMP8_TTMP9);
648 reserveRegisterTuples(
Reserved, AMDGPU::TTMP10_TTMP11);
649 reserveRegisterTuples(
Reserved, AMDGPU::TTMP12_TTMP13);
650 reserveRegisterTuples(
Reserved, AMDGPU::TTMP14_TTMP15);
653 reserveRegisterTuples(
Reserved, AMDGPU::SGPR_NULL64);
658 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
661 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
664 if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs)
671 if (ScratchRSrcReg != AMDGPU::NoRegister) {
675 reserveRegisterTuples(
Reserved, ScratchRSrcReg);
679 if (LongBranchReservedReg)
680 reserveRegisterTuples(
Reserved, LongBranchReservedReg);
687 reserveRegisterTuples(
Reserved, StackPtrReg);
688 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
693 reserveRegisterTuples(
Reserved, FrameReg);
694 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
699 reserveRegisterTuples(
Reserved, BasePtrReg);
700 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
707 reserveRegisterTuples(
Reserved, ExecCopyReg);
715 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
718 if (Index + NumRegs > MaxNumVGPRs)
729 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
732 if (Index + NumRegs > MaxNumAGPRs)
748 if (!NonWWMRegMask.
empty()) {
749 for (
unsigned RegI = AMDGPU::VGPR0, RegE = AMDGPU::VGPR0 + MaxNumVGPRs;
750 RegI < RegE; ++RegI) {
751 if (NonWWMRegMask.
test(RegI))
752 reserveRegisterTuples(
Reserved, RegI);
757 reserveRegisterTuples(
Reserved, Reg);
761 reserveRegisterTuples(
Reserved, Reg);
764 reserveRegisterTuples(
Reserved, Reg);
781 if (
Info->isBottomOfStack())
789 if (
Info->isEntryFunction()) {
823 AMDGPU::OpName::offset);
824 return MI->getOperand(OffIdx).getImm();
829 switch (
MI->getOpcode()) {
830 case AMDGPU::V_ADD_U32_e32:
831 case AMDGPU::V_ADD_U32_e64:
832 case AMDGPU::V_ADD_CO_U32_e32: {
833 int OtherIdx =
Idx == 1 ? 2 : 1;
837 case AMDGPU::V_ADD_CO_U32_e64: {
838 int OtherIdx =
Idx == 2 ? 3 : 2;
850 AMDGPU::OpName::vaddr) ||
852 AMDGPU::OpName::saddr))) &&
853 "Should never see frame index on non-address operand");
865 return Src1.
isImm() || (Src1.
isReg() &&
TRI.isVGPR(
MI.getMF()->getRegInfo(),
870 return Src0.
isImm() || (Src0.
isReg() &&
TRI.isVGPR(
MI.getMF()->getRegInfo(),
879 switch (
MI->getOpcode()) {
880 case AMDGPU::V_ADD_U32_e32: {
888 case AMDGPU::V_ADD_U32_e64:
898 case AMDGPU::V_ADD_CO_U32_e32:
904 return MI->getOperand(3).isDead();
905 case AMDGPU::V_ADD_CO_U32_e64:
907 return MI->getOperand(1).isDead();
919 return !
TII->isLegalMUBUFImmOffset(FullOffset);
932 DL = Ins->getDebugLoc();
938 : AMDGPU::V_MOV_B32_e32;
942 : &AMDGPU::VGPR_32RegClass);
950 Register OffsetReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
954 : &AMDGPU::VGPR_32RegClass);
970 TII->getAddNoCarry(*
MBB, Ins,
DL, BaseReg)
982 switch (
MI.getOpcode()) {
983 case AMDGPU::V_ADD_U32_e32:
984 case AMDGPU::V_ADD_CO_U32_e32: {
990 if (!ImmOp->
isImm()) {
993 TII->legalizeOperandsVOP2(
MI.getMF()->getRegInfo(),
MI);
998 if (TotalOffset == 0) {
999 MI.setDesc(
TII->get(AMDGPU::COPY));
1000 for (
unsigned I =
MI.getNumOperands() - 1;
I != 1; --
I)
1001 MI.removeOperand(
I);
1003 MI.getOperand(1).ChangeToRegister(BaseReg,
false);
1007 ImmOp->
setImm(TotalOffset);
1019 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1022 MI.getOperand(2).ChangeToRegister(BaseRegVGPR,
false);
1024 MI.getOperand(2).ChangeToRegister(BaseReg,
false);
1028 case AMDGPU::V_ADD_U32_e64:
1029 case AMDGPU::V_ADD_CO_U32_e64: {
1030 int Src0Idx =
MI.getNumExplicitDefs();
1036 if (!ImmOp->
isImm()) {
1038 TII->legalizeOperandsVOP3(
MI.getMF()->getRegInfo(),
MI);
1043 if (TotalOffset == 0) {
1044 MI.setDesc(
TII->get(AMDGPU::COPY));
1046 for (
unsigned I =
MI.getNumOperands() - 1;
I != 1; --
I)
1047 MI.removeOperand(
I);
1049 MI.getOperand(1).ChangeToRegister(BaseReg,
false);
1052 ImmOp->
setImm(TotalOffset);
1061 bool IsFlat =
TII->isFLATScratch(
MI);
1065 bool SeenFI =
false;
1077 TII->getNamedOperand(
MI, IsFlat ? AMDGPU::OpName::saddr
1078 : AMDGPU::OpName::vaddr);
1083 assert(FIOp && FIOp->
isFI() &&
"frame index must be address operand");
1089 "offset should be legal");
1091 OffsetOp->
setImm(NewOffset);
1100 assert(
TII->isLegalMUBUFImmOffset(NewOffset) &&
"offset should be legal");
1103 OffsetOp->
setImm(NewOffset);
1110 switch (
MI->getOpcode()) {
1111 case AMDGPU::V_ADD_U32_e32:
1112 case AMDGPU::V_ADD_CO_U32_e32:
1114 case AMDGPU::V_ADD_U32_e64:
1115 case AMDGPU::V_ADD_CO_U32_e64:
1128 return TII->isLegalMUBUFImmOffset(NewOffset);
1139 return &AMDGPU::VGPR_32RegClass;
1146 if (RC == &AMDGPU::SCC_CLASSRegClass)
1155 case AMDGPU::SI_SPILL_S1024_SAVE:
1156 case AMDGPU::SI_SPILL_S1024_RESTORE:
1157 case AMDGPU::SI_SPILL_V1024_SAVE:
1158 case AMDGPU::SI_SPILL_V1024_RESTORE:
1159 case AMDGPU::SI_SPILL_A1024_SAVE:
1160 case AMDGPU::SI_SPILL_A1024_RESTORE:
1161 case AMDGPU::SI_SPILL_AV1024_SAVE:
1162 case AMDGPU::SI_SPILL_AV1024_RESTORE:
1164 case AMDGPU::SI_SPILL_S512_SAVE:
1165 case AMDGPU::SI_SPILL_S512_RESTORE:
1166 case AMDGPU::SI_SPILL_V512_SAVE:
1167 case AMDGPU::SI_SPILL_V512_RESTORE:
1168 case AMDGPU::SI_SPILL_A512_SAVE:
1169 case AMDGPU::SI_SPILL_A512_RESTORE:
1170 case AMDGPU::SI_SPILL_AV512_SAVE:
1171 case AMDGPU::SI_SPILL_AV512_RESTORE:
1173 case AMDGPU::SI_SPILL_S384_SAVE:
1174 case AMDGPU::SI_SPILL_S384_RESTORE:
1175 case AMDGPU::SI_SPILL_V384_SAVE:
1176 case AMDGPU::SI_SPILL_V384_RESTORE:
1177 case AMDGPU::SI_SPILL_A384_SAVE:
1178 case AMDGPU::SI_SPILL_A384_RESTORE:
1179 case AMDGPU::SI_SPILL_AV384_SAVE:
1180 case AMDGPU::SI_SPILL_AV384_RESTORE:
1182 case AMDGPU::SI_SPILL_S352_SAVE:
1183 case AMDGPU::SI_SPILL_S352_RESTORE:
1184 case AMDGPU::SI_SPILL_V352_SAVE:
1185 case AMDGPU::SI_SPILL_V352_RESTORE:
1186 case AMDGPU::SI_SPILL_A352_SAVE:
1187 case AMDGPU::SI_SPILL_A352_RESTORE:
1188 case AMDGPU::SI_SPILL_AV352_SAVE:
1189 case AMDGPU::SI_SPILL_AV352_RESTORE:
1191 case AMDGPU::SI_SPILL_S320_SAVE:
1192 case AMDGPU::SI_SPILL_S320_RESTORE:
1193 case AMDGPU::SI_SPILL_V320_SAVE:
1194 case AMDGPU::SI_SPILL_V320_RESTORE:
1195 case AMDGPU::SI_SPILL_A320_SAVE:
1196 case AMDGPU::SI_SPILL_A320_RESTORE:
1197 case AMDGPU::SI_SPILL_AV320_SAVE:
1198 case AMDGPU::SI_SPILL_AV320_RESTORE:
1200 case AMDGPU::SI_SPILL_S288_SAVE:
1201 case AMDGPU::SI_SPILL_S288_RESTORE:
1202 case AMDGPU::SI_SPILL_V288_SAVE:
1203 case AMDGPU::SI_SPILL_V288_RESTORE:
1204 case AMDGPU::SI_SPILL_A288_SAVE:
1205 case AMDGPU::SI_SPILL_A288_RESTORE:
1206 case AMDGPU::SI_SPILL_AV288_SAVE:
1207 case AMDGPU::SI_SPILL_AV288_RESTORE:
1209 case AMDGPU::SI_SPILL_S256_SAVE:
1210 case AMDGPU::SI_SPILL_S256_RESTORE:
1211 case AMDGPU::SI_SPILL_V256_SAVE:
1212 case AMDGPU::SI_SPILL_V256_RESTORE:
1213 case AMDGPU::SI_SPILL_A256_SAVE:
1214 case AMDGPU::SI_SPILL_A256_RESTORE:
1215 case AMDGPU::SI_SPILL_AV256_SAVE:
1216 case AMDGPU::SI_SPILL_AV256_RESTORE:
1218 case AMDGPU::SI_SPILL_S224_SAVE:
1219 case AMDGPU::SI_SPILL_S224_RESTORE:
1220 case AMDGPU::SI_SPILL_V224_SAVE:
1221 case AMDGPU::SI_SPILL_V224_RESTORE:
1222 case AMDGPU::SI_SPILL_A224_SAVE:
1223 case AMDGPU::SI_SPILL_A224_RESTORE:
1224 case AMDGPU::SI_SPILL_AV224_SAVE:
1225 case AMDGPU::SI_SPILL_AV224_RESTORE:
1227 case AMDGPU::SI_SPILL_S192_SAVE:
1228 case AMDGPU::SI_SPILL_S192_RESTORE:
1229 case AMDGPU::SI_SPILL_V192_SAVE:
1230 case AMDGPU::SI_SPILL_V192_RESTORE:
1231 case AMDGPU::SI_SPILL_A192_SAVE:
1232 case AMDGPU::SI_SPILL_A192_RESTORE:
1233 case AMDGPU::SI_SPILL_AV192_SAVE:
1234 case AMDGPU::SI_SPILL_AV192_RESTORE:
1236 case AMDGPU::SI_SPILL_S160_SAVE:
1237 case AMDGPU::SI_SPILL_S160_RESTORE:
1238 case AMDGPU::SI_SPILL_V160_SAVE:
1239 case AMDGPU::SI_SPILL_V160_RESTORE:
1240 case AMDGPU::SI_SPILL_A160_SAVE:
1241 case AMDGPU::SI_SPILL_A160_RESTORE:
1242 case AMDGPU::SI_SPILL_AV160_SAVE:
1243 case AMDGPU::SI_SPILL_AV160_RESTORE:
1245 case AMDGPU::SI_SPILL_S128_SAVE:
1246 case AMDGPU::SI_SPILL_S128_RESTORE:
1247 case AMDGPU::SI_SPILL_V128_SAVE:
1248 case AMDGPU::SI_SPILL_V128_RESTORE:
1249 case AMDGPU::SI_SPILL_A128_SAVE:
1250 case AMDGPU::SI_SPILL_A128_RESTORE:
1251 case AMDGPU::SI_SPILL_AV128_SAVE:
1252 case AMDGPU::SI_SPILL_AV128_RESTORE:
1254 case AMDGPU::SI_SPILL_S96_SAVE:
1255 case AMDGPU::SI_SPILL_S96_RESTORE:
1256 case AMDGPU::SI_SPILL_V96_SAVE:
1257 case AMDGPU::SI_SPILL_V96_RESTORE:
1258 case AMDGPU::SI_SPILL_A96_SAVE:
1259 case AMDGPU::SI_SPILL_A96_RESTORE:
1260 case AMDGPU::SI_SPILL_AV96_SAVE:
1261 case AMDGPU::SI_SPILL_AV96_RESTORE:
1263 case AMDGPU::SI_SPILL_S64_SAVE:
1264 case AMDGPU::SI_SPILL_S64_RESTORE:
1265 case AMDGPU::SI_SPILL_V64_SAVE:
1266 case AMDGPU::SI_SPILL_V64_RESTORE:
1267 case AMDGPU::SI_SPILL_A64_SAVE:
1268 case AMDGPU::SI_SPILL_A64_RESTORE:
1269 case AMDGPU::SI_SPILL_AV64_SAVE:
1270 case AMDGPU::SI_SPILL_AV64_RESTORE:
1272 case AMDGPU::SI_SPILL_S32_SAVE:
1273 case AMDGPU::SI_SPILL_S32_RESTORE:
1274 case AMDGPU::SI_SPILL_V32_SAVE:
1275 case AMDGPU::SI_SPILL_V32_RESTORE:
1276 case AMDGPU::SI_SPILL_A32_SAVE:
1277 case AMDGPU::SI_SPILL_A32_RESTORE:
1278 case AMDGPU::SI_SPILL_AV32_SAVE:
1279 case AMDGPU::SI_SPILL_AV32_RESTORE:
1280 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
1281 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
1282 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
1283 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1291 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1292 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1293 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1294 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1295 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1296 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1297 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1298 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1299 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1300 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1301 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1302 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1303 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1304 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1305 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1306 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1314 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1315 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1316 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1317 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1318 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1319 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1320 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1321 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1322 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1323 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1324 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1325 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1326 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1327 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1328 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1329 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1330 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1331 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1332 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1333 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1334 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1335 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1336 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1337 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1338 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1339 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1340 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1341 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1349 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1350 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1351 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1352 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1353 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1354 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1355 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1356 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1357 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1358 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1359 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1360 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1361 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1362 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1363 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1364 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1372 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1373 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1374 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1375 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1376 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1377 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1378 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1379 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1380 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1381 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1382 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1383 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1384 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1385 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1386 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1387 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1388 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1389 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1390 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1391 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1392 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1393 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1394 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1395 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1396 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1397 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1398 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1399 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1408 int Index,
unsigned Lane,
1409 unsigned ValueReg,
bool IsKill) {
1416 if (Reg == AMDGPU::NoRegister)
1419 bool IsStore =
MI->mayStore();
1423 unsigned Dst = IsStore ? Reg : ValueReg;
1424 unsigned Src = IsStore ? ValueReg : Reg;
1425 bool IsVGPR =
TRI->isVGPR(
MRI, Reg);
1427 if (IsVGPR ==
TRI->isVGPR(
MRI, ValueReg)) {
1437 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1438 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1456 bool IsStore =
MI->mayStore();
1458 unsigned Opc =
MI->getOpcode();
1459 int LoadStoreOp = IsStore ?
1461 if (LoadStoreOp == -1)
1471 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::srsrc))
1472 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset))
1479 AMDGPU::OpName::vdata_in);
1481 NewMI.
add(*VDataIn);
1486 unsigned LoadStoreOp,
1488 bool IsStore =
TII->get(LoadStoreOp).mayStore();
1495 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1496 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1499 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1500 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1503 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1504 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1507 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1508 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1524 unsigned LoadStoreOp,
int Index,
Register ValueReg,
bool IsKill,
1527 assert((!RS || !LiveUnits) &&
"Only RS or LiveUnits can be set but not both");
1535 bool IsStore =
Desc->mayStore();
1536 bool IsFlat =
TII->isFLATScratch(LoadStoreOp);
1538 bool CanClobberSCC =
false;
1539 bool Scavenged =
false;
1549 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1550 unsigned NumSubRegs = RegWidth / EltSize;
1551 unsigned Size = NumSubRegs * EltSize;
1552 unsigned RemSize = RegWidth -
Size;
1553 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1555 int64_t MaterializedOffset =
Offset;
1557 int64_t MaxOffset =
Offset +
Size + RemSize - EltSize;
1558 int64_t ScratchOffsetRegDelta = 0;
1560 if (IsFlat && EltSize > 4) {
1562 Desc = &
TII->get(LoadStoreOp);
1569 "unexpected VGPR spill offset");
1576 bool UseVGPROffset =
false;
1583 if (IsFlat && SGPRBase) {
1607 bool IsOffsetLegal =
1610 :
TII->isLegalMUBUFImmOffset(MaxOffset);
1622 CanClobberSCC = !RS->
isRegUsed(AMDGPU::SCC);
1623 }
else if (LiveUnits) {
1624 CanClobberSCC = LiveUnits->
available(AMDGPU::SCC);
1625 for (
MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1633 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1637 UseVGPROffset =
true;
1643 for (
MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1645 TmpOffsetVGPR = Reg;
1652 }
else if (!SOffset && CanClobberSCC) {
1663 if (!ScratchOffsetReg)
1665 SOffset = ScratchOffsetReg;
1666 ScratchOffsetRegDelta =
Offset;
1674 if (!IsFlat && !UseVGPROffset)
1677 if (!UseVGPROffset && !SOffset)
1680 if (UseVGPROffset) {
1682 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR,
Offset);
1683 }
else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1688 .
addReg(ScratchOffsetReg)
1690 Add->getOperand(3).setIsDead();
1696 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1698 &&
"Unexpected vaddr for flat scratch with a FI operand");
1700 if (UseVGPROffset) {
1707 Desc = &
TII->get(LoadStoreOp);
1710 for (
unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1711 ++i, RegOffset += EltSize) {
1712 if (i == NumSubRegs) {
1716 Desc = &
TII->get(LoadStoreOp);
1718 if (!IsFlat && UseVGPROffset) {
1721 Desc = &
TII->get(NewLoadStoreOp);
1724 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1731 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1734 unsigned NumRegs = EltSize / 4;
1740 unsigned SOffsetRegState = 0;
1742 const bool IsLastSubReg = i + 1 == e;
1743 const bool IsFirstSubReg = i == 0;
1752 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1753 bool NeedSuperRegImpOperand = e > 1;
1757 unsigned RemEltSize = EltSize;
1765 for (
int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1766 LaneE = RegOffset / 4;
1767 Lane >= LaneE; --Lane) {
1768 bool IsSubReg = e > 1 || EltSize > 4;
1773 if (!MIB.getInstr())
1775 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1777 NeedSuperRegDef =
false;
1779 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1780 NeedSuperRegImpOperand =
true;
1781 unsigned State = SrcDstRegState;
1782 if (!IsLastSubReg || (Lane != LaneE))
1783 State &= ~RegState::Kill;
1784 if (!IsFirstSubReg || (Lane != LaneS))
1785 State &= ~RegState::Define;
1794 if (RemEltSize != EltSize) {
1795 assert(IsFlat && EltSize > 4);
1797 unsigned NumRegs = RemEltSize / 4;
1804 unsigned FinalReg =
SubReg;
1809 if (!TmpIntermediateVGPR) {
1815 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1816 TmpIntermediateVGPR)
1818 if (NeedSuperRegDef)
1820 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1824 SubReg = TmpIntermediateVGPR;
1825 }
else if (UseVGPROffset) {
1826 if (!TmpOffsetVGPR) {
1842 if (UseVGPROffset) {
1851 if (SOffset == AMDGPU::NoRegister) {
1853 if (UseVGPROffset && ScratchOffsetReg) {
1854 MIB.
addReg(ScratchOffsetReg);
1861 MIB.addReg(SOffset, SOffsetRegState);
1864 MIB.addImm(
Offset + RegOffset);
1871 MIB.addMemOperand(NewMMO);
1873 if (!IsAGPR && NeedSuperRegDef)
1876 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1883 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1907 if (!IsStore &&
MI !=
MBB.
end() &&
MI->isReturn() &&
1910 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1914 if (ScratchOffsetRegDelta != 0) {
1918 .
addImm(-ScratchOffsetRegDelta);
1924 bool IsKill)
const {
1942 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1947 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1958 bool SpillToPhysVGPRLane)
const {
1959 assert(!
MI->getOperand(0).isUndef() &&
1960 "undef spill should have been deleted earlier");
1967 bool SpillToVGPR = !VGPRSpills.
empty();
1968 if (OnlyToVGPR && !SpillToVGPR)
1981 "Num of SGPRs spilled should be less than or equal to num of "
1984 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
1991 bool IsFirstSubreg = i == 0;
1993 bool UseKill = SB.
IsKill && IsLastSubreg;
1999 SB.
TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
2016 if (SB.
NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
2036 for (
unsigned i =
Offset * PVD.PerVGPR,
2046 SB.
TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.
TmpVGPR)
2063 unsigned SuperKillState = 0;
2077 MI->eraseFromParent();
2089 bool SpillToPhysVGPRLane)
const {
2095 bool SpillToVGPR = !VGPRSpills.
empty();
2096 if (OnlyToVGPR && !SpillToVGPR)
2100 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
2108 SB.
TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR),
SubReg)
2131 for (
unsigned i =
Offset * PVD.PerVGPR,
2139 bool LastSubReg = (i + 1 == e);
2141 SB.
TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR),
SubReg)
2158 MI->eraseFromParent();
2178 for (
unsigned i =
Offset * PVD.PerVGPR,
2197 unsigned SuperKillState = 0;
2207 MI = RestoreMBB.
end();
2213 for (
unsigned i =
Offset * PVD.PerVGPR,
2220 bool LastSubReg = (i + 1 == e);
2241 switch (
MI->getOpcode()) {
2242 case AMDGPU::SI_SPILL_S1024_SAVE:
2243 case AMDGPU::SI_SPILL_S512_SAVE:
2244 case AMDGPU::SI_SPILL_S384_SAVE:
2245 case AMDGPU::SI_SPILL_S352_SAVE:
2246 case AMDGPU::SI_SPILL_S320_SAVE:
2247 case AMDGPU::SI_SPILL_S288_SAVE:
2248 case AMDGPU::SI_SPILL_S256_SAVE:
2249 case AMDGPU::SI_SPILL_S224_SAVE:
2250 case AMDGPU::SI_SPILL_S192_SAVE:
2251 case AMDGPU::SI_SPILL_S160_SAVE:
2252 case AMDGPU::SI_SPILL_S128_SAVE:
2253 case AMDGPU::SI_SPILL_S96_SAVE:
2254 case AMDGPU::SI_SPILL_S64_SAVE:
2255 case AMDGPU::SI_SPILL_S32_SAVE:
2256 return spillSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane);
2257 case AMDGPU::SI_SPILL_S1024_RESTORE:
2258 case AMDGPU::SI_SPILL_S512_RESTORE:
2259 case AMDGPU::SI_SPILL_S384_RESTORE:
2260 case AMDGPU::SI_SPILL_S352_RESTORE:
2261 case AMDGPU::SI_SPILL_S320_RESTORE:
2262 case AMDGPU::SI_SPILL_S288_RESTORE:
2263 case AMDGPU::SI_SPILL_S256_RESTORE:
2264 case AMDGPU::SI_SPILL_S224_RESTORE:
2265 case AMDGPU::SI_SPILL_S192_RESTORE:
2266 case AMDGPU::SI_SPILL_S160_RESTORE:
2267 case AMDGPU::SI_SPILL_S128_RESTORE:
2268 case AMDGPU::SI_SPILL_S96_RESTORE:
2269 case AMDGPU::SI_SPILL_S64_RESTORE:
2270 case AMDGPU::SI_SPILL_S32_RESTORE:
2271 return restoreSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane);
2278 int SPAdj,
unsigned FIOperandNum,
2287 assert(SPAdj == 0 &&
"unhandled SP adjustment in call sequence?");
2290 "unreserved scratch RSRC register");
2293 int Index =
MI->getOperand(FIOperandNum).getIndex();
2299 switch (
MI->getOpcode()) {
2301 case AMDGPU::SI_SPILL_S1024_SAVE:
2302 case AMDGPU::SI_SPILL_S512_SAVE:
2303 case AMDGPU::SI_SPILL_S384_SAVE:
2304 case AMDGPU::SI_SPILL_S352_SAVE:
2305 case AMDGPU::SI_SPILL_S320_SAVE:
2306 case AMDGPU::SI_SPILL_S288_SAVE:
2307 case AMDGPU::SI_SPILL_S256_SAVE:
2308 case AMDGPU::SI_SPILL_S224_SAVE:
2309 case AMDGPU::SI_SPILL_S192_SAVE:
2310 case AMDGPU::SI_SPILL_S160_SAVE:
2311 case AMDGPU::SI_SPILL_S128_SAVE:
2312 case AMDGPU::SI_SPILL_S96_SAVE:
2313 case AMDGPU::SI_SPILL_S64_SAVE:
2314 case AMDGPU::SI_SPILL_S32_SAVE: {
2319 case AMDGPU::SI_SPILL_S1024_RESTORE:
2320 case AMDGPU::SI_SPILL_S512_RESTORE:
2321 case AMDGPU::SI_SPILL_S384_RESTORE:
2322 case AMDGPU::SI_SPILL_S352_RESTORE:
2323 case AMDGPU::SI_SPILL_S320_RESTORE:
2324 case AMDGPU::SI_SPILL_S288_RESTORE:
2325 case AMDGPU::SI_SPILL_S256_RESTORE:
2326 case AMDGPU::SI_SPILL_S224_RESTORE:
2327 case AMDGPU::SI_SPILL_S192_RESTORE:
2328 case AMDGPU::SI_SPILL_S160_RESTORE:
2329 case AMDGPU::SI_SPILL_S128_RESTORE:
2330 case AMDGPU::SI_SPILL_S96_RESTORE:
2331 case AMDGPU::SI_SPILL_S64_RESTORE:
2332 case AMDGPU::SI_SPILL_S32_RESTORE: {
2337 case AMDGPU::SI_SPILL_V1024_SAVE:
2338 case AMDGPU::SI_SPILL_V512_SAVE:
2339 case AMDGPU::SI_SPILL_V384_SAVE:
2340 case AMDGPU::SI_SPILL_V352_SAVE:
2341 case AMDGPU::SI_SPILL_V320_SAVE:
2342 case AMDGPU::SI_SPILL_V288_SAVE:
2343 case AMDGPU::SI_SPILL_V256_SAVE:
2344 case AMDGPU::SI_SPILL_V224_SAVE:
2345 case AMDGPU::SI_SPILL_V192_SAVE:
2346 case AMDGPU::SI_SPILL_V160_SAVE:
2347 case AMDGPU::SI_SPILL_V128_SAVE:
2348 case AMDGPU::SI_SPILL_V96_SAVE:
2349 case AMDGPU::SI_SPILL_V64_SAVE:
2350 case AMDGPU::SI_SPILL_V32_SAVE:
2351 case AMDGPU::SI_SPILL_A1024_SAVE:
2352 case AMDGPU::SI_SPILL_A512_SAVE:
2353 case AMDGPU::SI_SPILL_A384_SAVE:
2354 case AMDGPU::SI_SPILL_A352_SAVE:
2355 case AMDGPU::SI_SPILL_A320_SAVE:
2356 case AMDGPU::SI_SPILL_A288_SAVE:
2357 case AMDGPU::SI_SPILL_A256_SAVE:
2358 case AMDGPU::SI_SPILL_A224_SAVE:
2359 case AMDGPU::SI_SPILL_A192_SAVE:
2360 case AMDGPU::SI_SPILL_A160_SAVE:
2361 case AMDGPU::SI_SPILL_A128_SAVE:
2362 case AMDGPU::SI_SPILL_A96_SAVE:
2363 case AMDGPU::SI_SPILL_A64_SAVE:
2364 case AMDGPU::SI_SPILL_A32_SAVE:
2365 case AMDGPU::SI_SPILL_AV1024_SAVE:
2366 case AMDGPU::SI_SPILL_AV512_SAVE:
2367 case AMDGPU::SI_SPILL_AV384_SAVE:
2368 case AMDGPU::SI_SPILL_AV352_SAVE:
2369 case AMDGPU::SI_SPILL_AV320_SAVE:
2370 case AMDGPU::SI_SPILL_AV288_SAVE:
2371 case AMDGPU::SI_SPILL_AV256_SAVE:
2372 case AMDGPU::SI_SPILL_AV224_SAVE:
2373 case AMDGPU::SI_SPILL_AV192_SAVE:
2374 case AMDGPU::SI_SPILL_AV160_SAVE:
2375 case AMDGPU::SI_SPILL_AV128_SAVE:
2376 case AMDGPU::SI_SPILL_AV96_SAVE:
2377 case AMDGPU::SI_SPILL_AV64_SAVE:
2378 case AMDGPU::SI_SPILL_AV32_SAVE:
2379 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
2380 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2382 AMDGPU::OpName::vdata);
2384 MI->eraseFromParent();
2388 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2392 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2393 auto *
MBB =
MI->getParent();
2394 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2395 if (IsWWMRegSpill) {
2401 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2402 *
MI->memoperands_begin(), RS);
2407 MI->eraseFromParent();
2410 case AMDGPU::SI_SPILL_V32_RESTORE:
2411 case AMDGPU::SI_SPILL_V64_RESTORE:
2412 case AMDGPU::SI_SPILL_V96_RESTORE:
2413 case AMDGPU::SI_SPILL_V128_RESTORE:
2414 case AMDGPU::SI_SPILL_V160_RESTORE:
2415 case AMDGPU::SI_SPILL_V192_RESTORE:
2416 case AMDGPU::SI_SPILL_V224_RESTORE:
2417 case AMDGPU::SI_SPILL_V256_RESTORE:
2418 case AMDGPU::SI_SPILL_V288_RESTORE:
2419 case AMDGPU::SI_SPILL_V320_RESTORE:
2420 case AMDGPU::SI_SPILL_V352_RESTORE:
2421 case AMDGPU::SI_SPILL_V384_RESTORE:
2422 case AMDGPU::SI_SPILL_V512_RESTORE:
2423 case AMDGPU::SI_SPILL_V1024_RESTORE:
2424 case AMDGPU::SI_SPILL_A32_RESTORE:
2425 case AMDGPU::SI_SPILL_A64_RESTORE:
2426 case AMDGPU::SI_SPILL_A96_RESTORE:
2427 case AMDGPU::SI_SPILL_A128_RESTORE:
2428 case AMDGPU::SI_SPILL_A160_RESTORE:
2429 case AMDGPU::SI_SPILL_A192_RESTORE:
2430 case AMDGPU::SI_SPILL_A224_RESTORE:
2431 case AMDGPU::SI_SPILL_A256_RESTORE:
2432 case AMDGPU::SI_SPILL_A288_RESTORE:
2433 case AMDGPU::SI_SPILL_A320_RESTORE:
2434 case AMDGPU::SI_SPILL_A352_RESTORE:
2435 case AMDGPU::SI_SPILL_A384_RESTORE:
2436 case AMDGPU::SI_SPILL_A512_RESTORE:
2437 case AMDGPU::SI_SPILL_A1024_RESTORE:
2438 case AMDGPU::SI_SPILL_AV32_RESTORE:
2439 case AMDGPU::SI_SPILL_AV64_RESTORE:
2440 case AMDGPU::SI_SPILL_AV96_RESTORE:
2441 case AMDGPU::SI_SPILL_AV128_RESTORE:
2442 case AMDGPU::SI_SPILL_AV160_RESTORE:
2443 case AMDGPU::SI_SPILL_AV192_RESTORE:
2444 case AMDGPU::SI_SPILL_AV224_RESTORE:
2445 case AMDGPU::SI_SPILL_AV256_RESTORE:
2446 case AMDGPU::SI_SPILL_AV288_RESTORE:
2447 case AMDGPU::SI_SPILL_AV320_RESTORE:
2448 case AMDGPU::SI_SPILL_AV352_RESTORE:
2449 case AMDGPU::SI_SPILL_AV384_RESTORE:
2450 case AMDGPU::SI_SPILL_AV512_RESTORE:
2451 case AMDGPU::SI_SPILL_AV1024_RESTORE:
2452 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
2453 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
2455 AMDGPU::OpName::vdata);
2456 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2460 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2461 auto *
MBB =
MI->getParent();
2462 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2463 if (IsWWMRegSpill) {
2470 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2471 *
MI->memoperands_begin(), RS);
2476 MI->eraseFromParent();
2479 case AMDGPU::V_ADD_U32_e32:
2480 case AMDGPU::V_ADD_U32_e64:
2481 case AMDGPU::V_ADD_CO_U32_e32:
2482 case AMDGPU::V_ADD_CO_U32_e64: {
2484 unsigned NumDefs =
MI->getNumExplicitDefs();
2485 unsigned Src0Idx = NumDefs;
2487 bool HasClamp =
false;
2490 switch (
MI->getOpcode()) {
2491 case AMDGPU::V_ADD_U32_e32:
2493 case AMDGPU::V_ADD_U32_e64:
2494 HasClamp =
MI->getOperand(3).getImm();
2496 case AMDGPU::V_ADD_CO_U32_e32:
2497 VCCOp = &
MI->getOperand(3);
2499 case AMDGPU::V_ADD_CO_U32_e64:
2500 VCCOp = &
MI->getOperand(1);
2501 HasClamp =
MI->getOperand(4).getImm();
2506 bool DeadVCC = !VCCOp || VCCOp->
isDead();
2510 unsigned OtherOpIdx =
2511 FIOperandNum == Src0Idx ? FIOperandNum + 1 : Src0Idx;
2514 unsigned Src1Idx = Src0Idx + 1;
2515 Register MaterializedReg = FrameReg;
2518 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2522 if (OtherOp->
isImm()) {
2533 OtherOp->
setImm(TotalOffset);
2546 AMDGPU::VGPR_32RegClass,
MI,
false, 0);
2554 MaterializedReg = ScavengedVGPR;
2557 if ((!OtherOp->
isImm() || OtherOp->
getImm() != 0) && MaterializedReg) {
2559 !
TII->isOperandLegal(*
MI, Src1Idx, OtherOp)) {
2566 if (!ScavengedVGPR) {
2568 AMDGPU::VGPR_32RegClass,
MI,
false,
2572 assert(ScavengedVGPR != DstReg);
2577 MaterializedReg = ScavengedVGPR;
2586 AddI32.
add(
MI->getOperand(1));
2588 unsigned MaterializedRegFlags =
2591 if (
isVGPRClass(getPhysRegBaseClass(MaterializedReg))) {
2596 .addReg(MaterializedReg, MaterializedRegFlags);
2601 .addReg(MaterializedReg, MaterializedRegFlags)
2605 if (
MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
2606 MI->getOpcode() == AMDGPU::V_ADD_U32_e64)
2609 if (
MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e32)
2610 AddI32.setOperandDead(3);
2612 MaterializedReg = DstReg;
2618 }
else if (
Offset != 0) {
2619 assert(!MaterializedReg);
2623 if (DeadVCC && !HasClamp) {
2628 if (OtherOp->
isReg() && OtherOp->
getReg() == DstReg) {
2630 MI->eraseFromParent();
2635 MI->setDesc(
TII->get(AMDGPU::V_MOV_B32_e32));
2636 MI->removeOperand(FIOperandNum);
2638 unsigned NumOps =
MI->getNumOperands();
2639 for (
unsigned I = NumOps - 2;
I >= NumDefs + 1; --
I)
2640 MI->removeOperand(
I);
2643 MI->removeOperand(1);
2655 if (!
TII->isOperandLegal(*
MI, Src1Idx) &&
TII->commuteInstruction(*
MI)) {
2663 for (
unsigned SrcIdx : {FIOperandNum, OtherOpIdx}) {
2664 if (!
TII->isOperandLegal(*
MI, SrcIdx)) {
2668 if (!ScavengedVGPR) {
2670 AMDGPU::VGPR_32RegClass,
MI,
false,
2674 assert(ScavengedVGPR != DstReg);
2680 Src.ChangeToRegister(ScavengedVGPR,
false);
2681 Src.setIsKill(
true);
2687 if (FIOp->
isImm() && FIOp->
getImm() == 0 && DeadVCC && !HasClamp) {
2688 if (OtherOp->
isReg() && OtherOp->
getReg() != DstReg) {
2692 MI->eraseFromParent();
2697 case AMDGPU::S_ADD_I32: {
2699 unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
2706 Register MaterializedReg = FrameReg;
2709 bool DeadSCC =
MI->getOperand(3).isDead();
2730 MaterializedReg = TmpReg;
2733 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2738 if (OtherOp.
isImm()) {
2742 if (MaterializedReg)
2746 }
else if (MaterializedReg) {
2750 if (!TmpReg && MaterializedReg == FrameReg) {
2764 MaterializedReg = DstReg;
2776 if (DeadSCC && OtherOp.
isImm() && OtherOp.
getImm() == 0) {
2778 MI->removeOperand(3);
2779 MI->removeOperand(OtherOpIdx);
2780 MI->setDesc(
TII->get(FIOp->
isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2781 }
else if (DeadSCC && FIOp->
isImm() && FIOp->
getImm() == 0) {
2783 MI->removeOperand(3);
2784 MI->removeOperand(FIOperandNum);
2786 TII->get(OtherOp.
isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2797 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2799 if (
TII->isFLATScratch(*
MI)) {
2801 (int16_t)FIOperandNum ==
2809 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset);
2813 OffsetOp->
setImm(NewOffset);
2820 unsigned Opc =
MI->getOpcode();
2835 bool TiedVDst = VDstIn != -1 &&
MI->getOperand(VDstIn).isReg() &&
2836 MI->getOperand(VDstIn).isTied();
2838 MI->untieRegOperand(VDstIn);
2848 assert(NewVDst != -1 && NewVDstIn != -1 &&
"Must be tied!");
2849 MI->tieOperands(NewVDst, NewVDstIn);
2851 MI->setDesc(
TII->get(NewOpc));
2859 if (
TII->isImmOperandLegal(*
MI, FIOperandNum, *FIOp))
2866 bool UseSGPR =
TII->isOperandLegal(*
MI, FIOperandNum, FIOp);
2868 if (!
Offset && FrameReg && UseSGPR) {
2874 UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass;
2881 if ((!FrameReg || !
Offset) && TmpReg) {
2882 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2885 MIB.addReg(FrameReg);
2892 bool NeedSaveSCC = RS->
isRegUsed(AMDGPU::SCC) &&
2893 !
MI->definesRegister(AMDGPU::SCC,
nullptr);
2898 MI,
false, 0, !UseSGPR);
2902 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2913 assert(!(
Offset & 0x1) &&
"Flat scratch offset must be aligned!");
2933 if (TmpSReg == FrameReg) {
2936 !
MI->registerDefIsDead(AMDGPU::SCC,
nullptr)) {
2960 bool IsMUBUF =
TII->isMUBUF(*
MI);
2966 bool LiveSCC = RS->
isRegUsed(AMDGPU::SCC) &&
2967 !
MI->definesRegister(AMDGPU::SCC,
nullptr);
2969 ? &AMDGPU::SReg_32RegClass
2970 : &AMDGPU::VGPR_32RegClass;
2971 bool IsCopy =
MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2972 MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
2973 MI->getOpcode() == AMDGPU::S_MOV_B32;
2975 IsCopy ?
MI->getOperand(0).getReg()
2978 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2981 IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 : AMDGPU::V_LSHRREV_B32_e64;
2983 if (IsSALU && LiveSCC) {
2989 if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
2995 if (IsSALU && !LiveSCC)
2996 Shift.getInstr()->getOperand(3).setIsDead();
2997 if (IsSALU && LiveSCC) {
3004 ResultReg = NewDest;
3009 if ((MIB =
TII->getAddNoCarry(*
MBB,
MI,
DL, ResultReg, *RS)) !=
3019 const bool IsVOP2 = MIB->
getOpcode() == AMDGPU::V_ADD_U32_e32;
3031 "Need to reuse carry out register");
3036 ConstOffsetReg = getSubReg(MIB.
getReg(1), AMDGPU::sub0);
3038 ConstOffsetReg = MIB.
getReg(1);
3049 if (!MIB || IsSALU) {
3056 Register TmpScaledReg = IsCopy && IsSALU
3059 AMDGPU::SReg_32_XM0RegClass,
MI,
3061 Register ScaledReg = TmpScaledReg.
isValid() ? TmpScaledReg : FrameReg;
3073 AMDGPU::VGPR_32RegClass,
MI,
false, 0,
true);
3076 if ((
Add =
TII->getAddNoCarry(*
MBB,
MI,
DL, TmpResultReg, *RS))) {
3081 if (
Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
3091 "offset is unsafe for v_mad_u32_u24");
3100 bool IsInlinableLiteral =
3102 if (!IsInlinableLiteral) {
3111 if (!IsInlinableLiteral) {
3124 Register NewDest = IsCopy ? ResultReg
3126 AMDGPU::SReg_32RegClass, *
Add,
3131 ResultReg = NewDest;
3137 ResultReg = TmpResultReg;
3139 if (!TmpScaledReg.
isValid()) {
3152 MI->eraseFromParent();
3162 static_cast<int>(FIOperandNum) ==
3165 auto &SOffset = *
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset);
3166 assert((SOffset.isImm() && SOffset.getImm() == 0));
3168 if (FrameReg != AMDGPU::NoRegister)
3169 SOffset.ChangeToRegister(FrameReg,
false);
3171 int64_t
Offset = FrameInfo.getObjectOffset(Index);
3173 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm();
3174 int64_t NewOffset = OldImm +
Offset;
3176 if (
TII->isLegalMUBUFImmOffset(NewOffset) &&
3178 MI->eraseFromParent();
3187 if (!
TII->isImmOperandLegal(*
MI, FIOperandNum, *FIOp)) {
3209 return &AMDGPU::VReg_64RegClass;
3211 return &AMDGPU::VReg_96RegClass;
3213 return &AMDGPU::VReg_128RegClass;
3215 return &AMDGPU::VReg_160RegClass;
3217 return &AMDGPU::VReg_192RegClass;
3219 return &AMDGPU::VReg_224RegClass;
3221 return &AMDGPU::VReg_256RegClass;
3223 return &AMDGPU::VReg_288RegClass;
3225 return &AMDGPU::VReg_320RegClass;
3227 return &AMDGPU::VReg_352RegClass;
3229 return &AMDGPU::VReg_384RegClass;
3231 return &AMDGPU::VReg_512RegClass;
3233 return &AMDGPU::VReg_1024RegClass;
3241 return &AMDGPU::VReg_64_Align2RegClass;
3243 return &AMDGPU::VReg_96_Align2RegClass;
3245 return &AMDGPU::VReg_128_Align2RegClass;
3247 return &AMDGPU::VReg_160_Align2RegClass;
3249 return &AMDGPU::VReg_192_Align2RegClass;
3251 return &AMDGPU::VReg_224_Align2RegClass;
3253 return &AMDGPU::VReg_256_Align2RegClass;
3255 return &AMDGPU::VReg_288_Align2RegClass;
3257 return &AMDGPU::VReg_320_Align2RegClass;
3259 return &AMDGPU::VReg_352_Align2RegClass;
3261 return &AMDGPU::VReg_384_Align2RegClass;
3263 return &AMDGPU::VReg_512_Align2RegClass;
3265 return &AMDGPU::VReg_1024_Align2RegClass;
3273 return &AMDGPU::VReg_1RegClass;
3275 return &AMDGPU::VGPR_16RegClass;
3277 return &AMDGPU::VGPR_32RegClass;
3285 return &AMDGPU::AReg_64RegClass;
3287 return &AMDGPU::AReg_96RegClass;
3289 return &AMDGPU::AReg_128RegClass;
3291 return &AMDGPU::AReg_160RegClass;
3293 return &AMDGPU::AReg_192RegClass;
3295 return &AMDGPU::AReg_224RegClass;
3297 return &AMDGPU::AReg_256RegClass;
3299 return &AMDGPU::AReg_288RegClass;
3301 return &AMDGPU::AReg_320RegClass;
3303 return &AMDGPU::AReg_352RegClass;
3305 return &AMDGPU::AReg_384RegClass;
3307 return &AMDGPU::AReg_512RegClass;
3309 return &AMDGPU::AReg_1024RegClass;
3317 return &AMDGPU::AReg_64_Align2RegClass;
3319 return &AMDGPU::AReg_96_Align2RegClass;
3321 return &AMDGPU::AReg_128_Align2RegClass;
3323 return &AMDGPU::AReg_160_Align2RegClass;
3325 return &AMDGPU::AReg_192_Align2RegClass;
3327 return &AMDGPU::AReg_224_Align2RegClass;
3329 return &AMDGPU::AReg_256_Align2RegClass;
3331 return &AMDGPU::AReg_288_Align2RegClass;
3333 return &AMDGPU::AReg_320_Align2RegClass;
3335 return &AMDGPU::AReg_352_Align2RegClass;
3337 return &AMDGPU::AReg_384_Align2RegClass;
3339 return &AMDGPU::AReg_512_Align2RegClass;
3341 return &AMDGPU::AReg_1024_Align2RegClass;
3349 return &AMDGPU::AGPR_LO16RegClass;
3351 return &AMDGPU::AGPR_32RegClass;
3359 return &AMDGPU::AV_64RegClass;
3361 return &AMDGPU::AV_96RegClass;
3363 return &AMDGPU::AV_128RegClass;
3365 return &AMDGPU::AV_160RegClass;
3367 return &AMDGPU::AV_192RegClass;
3369 return &AMDGPU::AV_224RegClass;
3371 return &AMDGPU::AV_256RegClass;
3373 return &AMDGPU::AV_288RegClass;
3375 return &AMDGPU::AV_320RegClass;
3377 return &AMDGPU::AV_352RegClass;
3379 return &AMDGPU::AV_384RegClass;
3381 return &AMDGPU::AV_512RegClass;
3383 return &AMDGPU::AV_1024RegClass;
3391 return &AMDGPU::AV_64_Align2RegClass;
3393 return &AMDGPU::AV_96_Align2RegClass;
3395 return &AMDGPU::AV_128_Align2RegClass;
3397 return &AMDGPU::AV_160_Align2RegClass;
3399 return &AMDGPU::AV_192_Align2RegClass;
3401 return &AMDGPU::AV_224_Align2RegClass;
3403 return &AMDGPU::AV_256_Align2RegClass;
3405 return &AMDGPU::AV_288_Align2RegClass;
3407 return &AMDGPU::AV_320_Align2RegClass;
3409 return &AMDGPU::AV_352_Align2RegClass;
3411 return &AMDGPU::AV_384_Align2RegClass;
3413 return &AMDGPU::AV_512_Align2RegClass;
3415 return &AMDGPU::AV_1024_Align2RegClass;
3423 return &AMDGPU::AV_32RegClass;
3432 return &AMDGPU::SGPR_LO16RegClass;
3434 return &AMDGPU::SReg_32RegClass;
3436 return &AMDGPU::SReg_64RegClass;
3438 return &AMDGPU::SGPR_96RegClass;
3440 return &AMDGPU::SGPR_128RegClass;
3442 return &AMDGPU::SGPR_160RegClass;
3444 return &AMDGPU::SGPR_192RegClass;
3446 return &AMDGPU::SGPR_224RegClass;
3448 return &AMDGPU::SGPR_256RegClass;
3450 return &AMDGPU::SGPR_288RegClass;
3452 return &AMDGPU::SGPR_320RegClass;
3454 return &AMDGPU::SGPR_352RegClass;
3456 return &AMDGPU::SGPR_384RegClass;
3458 return &AMDGPU::SGPR_512RegClass;
3460 return &AMDGPU::SGPR_1024RegClass;
3468 if (Reg.isVirtual())
3469 RC =
MRI.getRegClass(Reg);
3471 RC = getPhysRegBaseClass(Reg);
3477 unsigned Size = getRegSizeInBits(*SRC);
3479 assert(VRC &&
"Invalid register class size");
3485 unsigned Size = getRegSizeInBits(*SRC);
3487 assert(ARC &&
"Invalid register class size");
3493 unsigned Size = getRegSizeInBits(*VRC);
3495 return &AMDGPU::SGPR_32RegClass;
3497 assert(SRC &&
"Invalid register class size");
3504 unsigned SubIdx)
const {
3507 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
3508 return MatchRC && MatchRC->
hasSubClassEq(SuperRC) ? MatchRC :
nullptr;
3524 unsigned SrcSubReg)
const {
3541 return getCommonSubClass(DefRC, SrcRC) !=
nullptr;
3557 if (ReserveHighestRegister) {
3559 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
3563 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
3580 unsigned EltSize)
const {
3582 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
3584 const unsigned RegDWORDs = RegBitWidth / 32;
3585 const unsigned EltDWORDs = EltSize / 4;
3586 assert(RegSplitParts.size() + 1 >= EltDWORDs);
3588 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
3589 const unsigned NumParts = RegDWORDs / EltDWORDs;
3591 return ArrayRef(Parts.data(), NumParts);
3597 return Reg.isVirtual() ?
MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
3604 return getSubRegisterClass(SrcRC, MO.
getSubReg());
3629 unsigned SrcSize = getRegSizeInBits(*SrcRC);
3630 unsigned DstSize = getRegSizeInBits(*DstRC);
3631 unsigned NewSize = getRegSizeInBits(*NewRC);
3637 if (SrcSize <= 32 || DstSize <= 32)
3640 return NewSize <= DstSize || NewSize <= SrcSize;
3649 switch (RC->
getID()) {
3651 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
3652 case AMDGPU::VGPR_32RegClassID:
3654 case AMDGPU::SGPR_32RegClassID:
3655 case AMDGPU::SGPR_LO16RegClassID:
3661 unsigned Idx)
const {
3662 if (
Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
3663 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
3667 if (
Idx == AMDGPU::RegisterPressureSets::SReg_32)
3675 static const int Empty[] = { -1 };
3677 if (RegPressureIgnoredUnits[RegUnit])
3680 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3685 return AMDGPU::SGPR30_SGPR31;
3691 switch (RB.
getID()) {
3692 case AMDGPU::VGPRRegBankID:
3695 case AMDGPU::VCCRegBankID:
3698 case AMDGPU::SGPRRegBankID:
3700 case AMDGPU::AGPRRegBankID:
3711 if (
const RegisterBank *RB = dyn_cast<const RegisterBank *>(RCOrRB))
3714 if (
const auto *RC = dyn_cast<const TargetRegisterClass *>(RCOrRB))
3715 return getAllocatableClass(RC);
3721 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3725 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3731 : &AMDGPU::VReg_64RegClass;
3736 switch ((
int)RCID) {
3737 case AMDGPU::SReg_1RegClassID:
3739 case AMDGPU::SReg_1_XEXECRegClassID:
3744 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3757 if (Reg.isVirtual()) {
3762 :
MRI.getMaxLaneMaskForVReg(Reg);
3766 if ((S.LaneMask & SubLanes) == SubLanes) {
3767 V = S.getVNInfoAt(UseIdx);
3779 for (
MCRegUnit Unit : regunits(Reg.asMCReg())) {
3794 if (!Def || !MDT.dominates(Def, &
Use))
3797 assert(Def->modifiesRegister(Reg,
this));
3803 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3806 AMDGPU::SReg_32RegClass,
3807 AMDGPU::AGPR_32RegClass } ) {
3808 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3811 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3812 &AMDGPU::VGPR_32RegClass)) {
3816 return AMDGPU::NoRegister;
3839 unsigned Size = getRegSizeInBits(*RC);
3873 return std::min(128u, getSubRegIdxSize(
SubReg));
3877 return std::min(32u, getSubRegIdxSize(
SubReg));
3888 if (
MRI.isPhysRegUsed(Reg))
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static void emitUnsupportedError(const Function &Fn, const MachineInstr &MI, const Twine &ErrMsg)
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))
static unsigned getNumSubRegsForSpillOp(unsigned Op)
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static bool isFIPlusImmOrVGPR(const SIRegisterInfo &TRI, const MachineInstr &MI)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static const char * getRegisterName(MCRegister Reg)
uint32_t getLDSSize() const
bool isBottomOfStack() const
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool test(unsigned Idx) const
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
bool empty() const
empty - Tests whether there are no bits in this bitvector.
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool enableFlatScratch() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
const SIFrameLowering * getFrameLowering() const override
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasVOP3Literal() const
bool hasFlatScratchSTMode() const
unsigned getMaxWaveScratchSize() const
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
MachineDominatorTree & getDomTree()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
A set of register units used to track register liveness.
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Describe properties that are true of each instruction in the target description file.
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
void setIsRenamable(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void assignRegToScavengingIndex(int FI, Register Reg, MachineInstr *Restore=nullptr)
Record that Reg is in use at scavenging index FI.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
static bool isVOP3(const MachineInstr &MI)
static bool isFLATScratch(const MachineInstr &MI)
static bool isMUBUF(const MachineInstr &MI)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
Register getSGPRForEXECCopy() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Register getVGPRForAGPRCopy() const
Register getFrameOffsetReg() const
BitVector getNonWWMRegMask() const
bool checkFlag(Register Reg, uint8_t Flag) const
void addToSpilledVGPRs(unsigned num)
const ReservedRegSet & getWWMReservedRegs() const
void addToSpilledSGPRs(unsigned num)
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const
Special case of eliminateFrameIndex.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr) const
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const MachineFunction &MF) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
static bool isChainScratchRegister(Register VGPR)
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const
Return the largest available SGPR aligned to Align for the register class RC.
unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI, const TargetRegisterClass &RC) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
SmallVector< StringLiteral > getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
bool isValid() const
Returns true if this is a valid index.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
const uint8_t TSFlags
Configurable target specific flags.
ArrayRef< MCPhysReg > getRegisters() const
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
A Use represents the edge between a Value definition and its users.
VNInfo - Value Number Information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
@ OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_LAST
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Renamable
Register that may be renamed.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
auto reverse(ContainerTy &&C)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
The llvm::once_flag structure.