29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc(
"Enable spilling SGPRs to VGPRs"),
38std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
116 MI->getOperand(0).isKill(),
Index,
RS) {}
131 MovOpc = AMDGPU::S_MOV_B32;
132 NotOpc = AMDGPU::S_NOT_B32;
135 MovOpc = AMDGPU::S_MOV_B64;
136 NotOpc = AMDGPU::S_NOT_B64;
141 SuperReg != AMDGPU::EXEC &&
"exec should never spill");
172 assert(
RS &&
"Cannot spill SGPR to memory without RegScavenger");
201 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
222 MI->emitError(
"unhandled SGPR spill to memory");
232 I->getOperand(2).setIsDead();
267 I->getOperand(2).setIsDead();
297 MI->emitError(
"unhandled SGPR spill to memory");
322 ST.getAMDGPUDwarfFlavour()),
325 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
326 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
327 (getSubRegIndexLaneMask(AMDGPU::lo16) |
328 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
329 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
330 "getNumCoveredRegs() will not work with generated subreg masks!");
332 RegPressureIgnoredUnits.
resize(getNumRegUnits());
334 for (
auto Reg : AMDGPU::VGPR_16RegClass) {
336 RegPressureIgnoredUnits.
set(*regunits(Reg).begin());
342 static auto InitializeRegSplitPartsOnce = [
this]() {
343 for (
unsigned Idx = 1,
E = getNumSubRegIndices() - 1;
Idx <
E; ++
Idx) {
344 unsigned Size = getSubRegIdxSize(
Idx);
347 std::vector<int16_t> &Vec = RegSplitParts[
Size / 32 - 1];
348 unsigned Pos = getSubRegIdxOffset(
Idx);
353 unsigned MaxNumParts = 1024 /
Size;
354 Vec.resize(MaxNumParts);
362 static auto InitializeSubRegFromChannelTableOnce = [
this]() {
363 for (
auto &Row : SubRegFromChannelTable)
364 Row.fill(AMDGPU::NoSubRegister);
365 for (
unsigned Idx = 1;
Idx < getNumSubRegIndices(); ++
Idx) {
366 unsigned Width = AMDGPUSubRegIdxRanges[
Idx].Size / 32;
367 unsigned Offset = AMDGPUSubRegIdxRanges[
Idx].Offset / 32;
372 unsigned TableIdx = Width - 1;
373 assert(TableIdx < SubRegFromChannelTable.size());
375 SubRegFromChannelTable[TableIdx][
Offset] =
Idx;
379 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
381 InitializeSubRegFromChannelTableOnce);
399 : CSR_AMDGPU_SaveList;
401 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
402 : CSR_AMDGPU_SI_Gfx_SaveList;
404 return CSR_AMDGPU_CS_ChainPreserve_SaveList;
407 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
408 return &NoCalleeSavedReg;
425 : CSR_AMDGPU_RegMask;
427 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
428 : CSR_AMDGPU_SI_Gfx_RegMask;
433 return AMDGPU_AllVGPRs_RegMask;
440 return CSR_AMDGPU_NoRegs_RegMask;
444 return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;
455 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
456 return &AMDGPU::AV_32RegClass;
457 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
458 return &AMDGPU::AV_64RegClass;
459 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
460 RC == &AMDGPU::AReg_64_Align2RegClass)
461 return &AMDGPU::AV_64_Align2RegClass;
462 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
463 return &AMDGPU::AV_96RegClass;
464 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
465 RC == &AMDGPU::AReg_96_Align2RegClass)
466 return &AMDGPU::AV_96_Align2RegClass;
467 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
468 return &AMDGPU::AV_128RegClass;
469 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
470 RC == &AMDGPU::AReg_128_Align2RegClass)
471 return &AMDGPU::AV_128_Align2RegClass;
472 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
473 return &AMDGPU::AV_160RegClass;
474 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
475 RC == &AMDGPU::AReg_160_Align2RegClass)
476 return &AMDGPU::AV_160_Align2RegClass;
477 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
478 return &AMDGPU::AV_192RegClass;
479 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
480 RC == &AMDGPU::AReg_192_Align2RegClass)
481 return &AMDGPU::AV_192_Align2RegClass;
482 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
483 return &AMDGPU::AV_256RegClass;
484 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
485 RC == &AMDGPU::AReg_256_Align2RegClass)
486 return &AMDGPU::AV_256_Align2RegClass;
487 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
488 return &AMDGPU::AV_512RegClass;
489 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
490 RC == &AMDGPU::AReg_512_Align2RegClass)
491 return &AMDGPU::AV_512_Align2RegClass;
492 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
493 return &AMDGPU::AV_1024RegClass;
494 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
495 RC == &AMDGPU::AReg_1024_Align2RegClass)
496 return &AMDGPU::AV_1024_Align2RegClass;
526 return AMDGPU_AllVGPRs_RegMask;
530 return AMDGPU_AllAGPRs_RegMask;
534 return AMDGPU_AllVectorRegs_RegMask;
538 return AMDGPU_AllAllocatableSRegs_RegMask;
545 assert(NumRegIndex &&
"Not implemented");
546 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].
size());
547 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
552 const unsigned Align,
555 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
556 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
574 reserveRegisterTuples(
Reserved, AMDGPU::EXEC);
575 reserveRegisterTuples(
Reserved, AMDGPU::FLAT_SCR);
578 reserveRegisterTuples(
Reserved, AMDGPU::M0);
581 reserveRegisterTuples(
Reserved, AMDGPU::SRC_VCCZ);
582 reserveRegisterTuples(
Reserved, AMDGPU::SRC_EXECZ);
583 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SCC);
586 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_BASE);
587 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_LIMIT);
588 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_BASE);
589 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
592 reserveRegisterTuples(
Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
595 reserveRegisterTuples(
Reserved, AMDGPU::XNACK_MASK);
598 reserveRegisterTuples(
Reserved, AMDGPU::LDS_DIRECT);
601 reserveRegisterTuples(
Reserved, AMDGPU::TBA);
602 reserveRegisterTuples(
Reserved, AMDGPU::TMA);
603 reserveRegisterTuples(
Reserved, AMDGPU::TTMP0_TTMP1);
604 reserveRegisterTuples(
Reserved, AMDGPU::TTMP2_TTMP3);
605 reserveRegisterTuples(
Reserved, AMDGPU::TTMP4_TTMP5);
606 reserveRegisterTuples(
Reserved, AMDGPU::TTMP6_TTMP7);
607 reserveRegisterTuples(
Reserved, AMDGPU::TTMP8_TTMP9);
608 reserveRegisterTuples(
Reserved, AMDGPU::TTMP10_TTMP11);
609 reserveRegisterTuples(
Reserved, AMDGPU::TTMP12_TTMP13);
610 reserveRegisterTuples(
Reserved, AMDGPU::TTMP14_TTMP15);
613 reserveRegisterTuples(
Reserved, AMDGPU::SGPR_NULL64);
625 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
628 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
631 if (
Index + NumRegs > MaxNumSGPRs &&
Index < TotalNumSGPRs)
638 if (ScratchRSrcReg != AMDGPU::NoRegister) {
642 reserveRegisterTuples(
Reserved, ScratchRSrcReg);
646 if (LongBranchReservedReg)
647 reserveRegisterTuples(
Reserved, LongBranchReservedReg);
654 reserveRegisterTuples(
Reserved, StackPtrReg);
655 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
660 reserveRegisterTuples(
Reserved, FrameReg);
661 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
666 reserveRegisterTuples(
Reserved, BasePtrReg);
667 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
674 reserveRegisterTuples(
Reserved, ExecCopyReg);
679 unsigned MaxNumAGPRs = MaxNumVGPRs;
680 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
693 MaxNumAGPRs = MaxNumVGPRs;
695 if (MaxNumVGPRs > TotalNumVGPRs) {
696 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
697 MaxNumVGPRs = TotalNumVGPRs;
705 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
708 if (
Index + NumRegs > MaxNumVGPRs)
719 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
722 if (
Index + NumRegs > MaxNumAGPRs)
735 reserveRegisterTuples(
Reserved, Reg);
739 reserveRegisterTuples(
Reserved, Reg);
742 reserveRegisterTuples(
Reserved, Reg);
759 if (
Info->isBottomOfStack())
767 if (
Info->isEntryFunction()) {
801 AMDGPU::OpName::offset);
802 return MI->getOperand(OffIdx).getImm();
811 AMDGPU::OpName::vaddr) ||
813 AMDGPU::OpName::saddr))) &&
814 "Should never see frame index on non-address operand");
827 return !
TII->isLegalMUBUFImmOffset(FullOffset);
840 DL = Ins->getDebugLoc();
846 : AMDGPU::V_MOV_B32_e32;
850 : &AMDGPU::VGPR_32RegClass);
858 Register OffsetReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
862 : &AMDGPU::VGPR_32RegClass);
876 TII->getAddNoCarry(*
MBB, Ins,
DL, BaseReg)
887 bool IsFlat =
TII->isFLATScratch(
MI);
903 TII->getNamedOperand(
MI, IsFlat ? AMDGPU::OpName::saddr
904 : AMDGPU::OpName::vaddr);
909 assert(FIOp && FIOp->
isFI() &&
"frame index must be address operand");
915 "offset should be legal");
917 OffsetOp->
setImm(NewOffset);
926 assert(
TII->isLegalMUBUFImmOffset(NewOffset) &&
"offset should be legal");
929 OffsetOp->
setImm(NewOffset);
942 return TII->isLegalMUBUFImmOffset(NewOffset);
953 return &AMDGPU::VGPR_32RegClass;
960 if (RC == &AMDGPU::SCC_CLASSRegClass)
969 case AMDGPU::SI_SPILL_S1024_SAVE:
970 case AMDGPU::SI_SPILL_S1024_RESTORE:
971 case AMDGPU::SI_SPILL_V1024_SAVE:
972 case AMDGPU::SI_SPILL_V1024_RESTORE:
973 case AMDGPU::SI_SPILL_A1024_SAVE:
974 case AMDGPU::SI_SPILL_A1024_RESTORE:
975 case AMDGPU::SI_SPILL_AV1024_SAVE:
976 case AMDGPU::SI_SPILL_AV1024_RESTORE:
978 case AMDGPU::SI_SPILL_S512_SAVE:
979 case AMDGPU::SI_SPILL_S512_RESTORE:
980 case AMDGPU::SI_SPILL_V512_SAVE:
981 case AMDGPU::SI_SPILL_V512_RESTORE:
982 case AMDGPU::SI_SPILL_A512_SAVE:
983 case AMDGPU::SI_SPILL_A512_RESTORE:
984 case AMDGPU::SI_SPILL_AV512_SAVE:
985 case AMDGPU::SI_SPILL_AV512_RESTORE:
987 case AMDGPU::SI_SPILL_S384_SAVE:
988 case AMDGPU::SI_SPILL_S384_RESTORE:
989 case AMDGPU::SI_SPILL_V384_SAVE:
990 case AMDGPU::SI_SPILL_V384_RESTORE:
991 case AMDGPU::SI_SPILL_A384_SAVE:
992 case AMDGPU::SI_SPILL_A384_RESTORE:
993 case AMDGPU::SI_SPILL_AV384_SAVE:
994 case AMDGPU::SI_SPILL_AV384_RESTORE:
996 case AMDGPU::SI_SPILL_S352_SAVE:
997 case AMDGPU::SI_SPILL_S352_RESTORE:
998 case AMDGPU::SI_SPILL_V352_SAVE:
999 case AMDGPU::SI_SPILL_V352_RESTORE:
1000 case AMDGPU::SI_SPILL_A352_SAVE:
1001 case AMDGPU::SI_SPILL_A352_RESTORE:
1002 case AMDGPU::SI_SPILL_AV352_SAVE:
1003 case AMDGPU::SI_SPILL_AV352_RESTORE:
1005 case AMDGPU::SI_SPILL_S320_SAVE:
1006 case AMDGPU::SI_SPILL_S320_RESTORE:
1007 case AMDGPU::SI_SPILL_V320_SAVE:
1008 case AMDGPU::SI_SPILL_V320_RESTORE:
1009 case AMDGPU::SI_SPILL_A320_SAVE:
1010 case AMDGPU::SI_SPILL_A320_RESTORE:
1011 case AMDGPU::SI_SPILL_AV320_SAVE:
1012 case AMDGPU::SI_SPILL_AV320_RESTORE:
1014 case AMDGPU::SI_SPILL_S288_SAVE:
1015 case AMDGPU::SI_SPILL_S288_RESTORE:
1016 case AMDGPU::SI_SPILL_V288_SAVE:
1017 case AMDGPU::SI_SPILL_V288_RESTORE:
1018 case AMDGPU::SI_SPILL_A288_SAVE:
1019 case AMDGPU::SI_SPILL_A288_RESTORE:
1020 case AMDGPU::SI_SPILL_AV288_SAVE:
1021 case AMDGPU::SI_SPILL_AV288_RESTORE:
1023 case AMDGPU::SI_SPILL_S256_SAVE:
1024 case AMDGPU::SI_SPILL_S256_RESTORE:
1025 case AMDGPU::SI_SPILL_V256_SAVE:
1026 case AMDGPU::SI_SPILL_V256_RESTORE:
1027 case AMDGPU::SI_SPILL_A256_SAVE:
1028 case AMDGPU::SI_SPILL_A256_RESTORE:
1029 case AMDGPU::SI_SPILL_AV256_SAVE:
1030 case AMDGPU::SI_SPILL_AV256_RESTORE:
1032 case AMDGPU::SI_SPILL_S224_SAVE:
1033 case AMDGPU::SI_SPILL_S224_RESTORE:
1034 case AMDGPU::SI_SPILL_V224_SAVE:
1035 case AMDGPU::SI_SPILL_V224_RESTORE:
1036 case AMDGPU::SI_SPILL_A224_SAVE:
1037 case AMDGPU::SI_SPILL_A224_RESTORE:
1038 case AMDGPU::SI_SPILL_AV224_SAVE:
1039 case AMDGPU::SI_SPILL_AV224_RESTORE:
1041 case AMDGPU::SI_SPILL_S192_SAVE:
1042 case AMDGPU::SI_SPILL_S192_RESTORE:
1043 case AMDGPU::SI_SPILL_V192_SAVE:
1044 case AMDGPU::SI_SPILL_V192_RESTORE:
1045 case AMDGPU::SI_SPILL_A192_SAVE:
1046 case AMDGPU::SI_SPILL_A192_RESTORE:
1047 case AMDGPU::SI_SPILL_AV192_SAVE:
1048 case AMDGPU::SI_SPILL_AV192_RESTORE:
1050 case AMDGPU::SI_SPILL_S160_SAVE:
1051 case AMDGPU::SI_SPILL_S160_RESTORE:
1052 case AMDGPU::SI_SPILL_V160_SAVE:
1053 case AMDGPU::SI_SPILL_V160_RESTORE:
1054 case AMDGPU::SI_SPILL_A160_SAVE:
1055 case AMDGPU::SI_SPILL_A160_RESTORE:
1056 case AMDGPU::SI_SPILL_AV160_SAVE:
1057 case AMDGPU::SI_SPILL_AV160_RESTORE:
1059 case AMDGPU::SI_SPILL_S128_SAVE:
1060 case AMDGPU::SI_SPILL_S128_RESTORE:
1061 case AMDGPU::SI_SPILL_V128_SAVE:
1062 case AMDGPU::SI_SPILL_V128_RESTORE:
1063 case AMDGPU::SI_SPILL_A128_SAVE:
1064 case AMDGPU::SI_SPILL_A128_RESTORE:
1065 case AMDGPU::SI_SPILL_AV128_SAVE:
1066 case AMDGPU::SI_SPILL_AV128_RESTORE:
1068 case AMDGPU::SI_SPILL_S96_SAVE:
1069 case AMDGPU::SI_SPILL_S96_RESTORE:
1070 case AMDGPU::SI_SPILL_V96_SAVE:
1071 case AMDGPU::SI_SPILL_V96_RESTORE:
1072 case AMDGPU::SI_SPILL_A96_SAVE:
1073 case AMDGPU::SI_SPILL_A96_RESTORE:
1074 case AMDGPU::SI_SPILL_AV96_SAVE:
1075 case AMDGPU::SI_SPILL_AV96_RESTORE:
1077 case AMDGPU::SI_SPILL_S64_SAVE:
1078 case AMDGPU::SI_SPILL_S64_RESTORE:
1079 case AMDGPU::SI_SPILL_V64_SAVE:
1080 case AMDGPU::SI_SPILL_V64_RESTORE:
1081 case AMDGPU::SI_SPILL_A64_SAVE:
1082 case AMDGPU::SI_SPILL_A64_RESTORE:
1083 case AMDGPU::SI_SPILL_AV64_SAVE:
1084 case AMDGPU::SI_SPILL_AV64_RESTORE:
1086 case AMDGPU::SI_SPILL_S32_SAVE:
1087 case AMDGPU::SI_SPILL_S32_RESTORE:
1088 case AMDGPU::SI_SPILL_V32_SAVE:
1089 case AMDGPU::SI_SPILL_V32_RESTORE:
1090 case AMDGPU::SI_SPILL_A32_SAVE:
1091 case AMDGPU::SI_SPILL_A32_RESTORE:
1092 case AMDGPU::SI_SPILL_AV32_SAVE:
1093 case AMDGPU::SI_SPILL_AV32_RESTORE:
1094 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
1095 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
1096 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
1097 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1105 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1106 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1107 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1108 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1109 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1110 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1111 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1112 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1113 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1114 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1115 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1116 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1117 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1118 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1119 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1120 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1128 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1129 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1130 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1131 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1132 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1133 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1134 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1135 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1136 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1137 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1138 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1139 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1140 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1141 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1142 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1143 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1144 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1145 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1146 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1147 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1148 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1149 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1150 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1151 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1152 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1153 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1154 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1155 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1163 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1164 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1165 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1166 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1167 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1168 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1169 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1170 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1171 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1172 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1173 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1174 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1175 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1176 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1177 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1178 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1186 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1187 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1188 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1189 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1190 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1191 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1192 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1193 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1194 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1195 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1196 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1197 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1198 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1199 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1200 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1201 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1202 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1203 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1204 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1205 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1206 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1207 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1208 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1209 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1210 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1211 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1212 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1213 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1222 int Index,
unsigned Lane,
1223 unsigned ValueReg,
bool IsKill) {
1230 if (Reg == AMDGPU::NoRegister)
1233 bool IsStore =
MI->mayStore();
1237 unsigned Dst = IsStore ? Reg : ValueReg;
1238 unsigned Src = IsStore ? ValueReg : Reg;
1239 bool IsVGPR =
TRI->isVGPR(
MRI, Reg);
1241 if (IsVGPR ==
TRI->isVGPR(
MRI, ValueReg)) {
1251 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1252 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1270 bool IsStore =
MI->mayStore();
1272 unsigned Opc =
MI->getOpcode();
1273 int LoadStoreOp = IsStore ?
1275 if (LoadStoreOp == -1)
1285 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::srsrc))
1286 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset))
1293 AMDGPU::OpName::vdata_in);
1295 NewMI.
add(*VDataIn);
1300 unsigned LoadStoreOp,
1302 bool IsStore =
TII->get(LoadStoreOp).mayStore();
1309 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1310 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1313 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1314 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1317 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1318 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1321 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1322 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1338 unsigned LoadStoreOp,
int Index,
Register ValueReg,
bool IsKill,
1341 assert((!RS || !LiveUnits) &&
"Only RS or LiveUnits can be set but not both");
1349 bool IsStore =
Desc->mayStore();
1350 bool IsFlat =
TII->isFLATScratch(LoadStoreOp);
1352 bool CanClobberSCC =
false;
1353 bool Scavenged =
false;
1363 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1364 unsigned NumSubRegs = RegWidth / EltSize;
1365 unsigned Size = NumSubRegs * EltSize;
1366 unsigned RemSize = RegWidth -
Size;
1367 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1369 int64_t MaterializedOffset =
Offset;
1371 int64_t MaxOffset =
Offset +
Size + RemSize - EltSize;
1372 int64_t ScratchOffsetRegDelta = 0;
1374 if (IsFlat && EltSize > 4) {
1376 Desc = &
TII->get(LoadStoreOp);
1383 "unexpected VGPR spill offset");
1390 bool UseVGPROffset =
false;
1397 if (IsFlat && SGPRBase) {
1421 bool IsOffsetLegal =
1424 :
TII->isLegalMUBUFImmOffset(MaxOffset);
1436 CanClobberSCC = !RS->
isRegUsed(AMDGPU::SCC);
1437 }
else if (LiveUnits) {
1438 CanClobberSCC = LiveUnits->
available(AMDGPU::SCC);
1439 for (
MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1447 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1451 UseVGPROffset =
true;
1457 for (
MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1459 TmpOffsetVGPR = Reg;
1466 }
else if (!SOffset && CanClobberSCC) {
1477 if (!ScratchOffsetReg)
1479 SOffset = ScratchOffsetReg;
1480 ScratchOffsetRegDelta =
Offset;
1488 if (!IsFlat && !UseVGPROffset)
1491 if (!UseVGPROffset && !SOffset)
1494 if (UseVGPROffset) {
1496 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR,
Offset);
1497 }
else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1502 .
addReg(ScratchOffsetReg)
1504 Add->getOperand(3).setIsDead();
1510 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1512 &&
"Unexpected vaddr for flat scratch with a FI operand");
1514 if (UseVGPROffset) {
1521 Desc = &
TII->get(LoadStoreOp);
1524 for (
unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1525 ++i, RegOffset += EltSize) {
1526 if (i == NumSubRegs) {
1530 Desc = &
TII->get(LoadStoreOp);
1532 if (!IsFlat && UseVGPROffset) {
1535 Desc = &
TII->get(NewLoadStoreOp);
1538 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1545 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1548 unsigned NumRegs = EltSize / 4;
1554 unsigned SOffsetRegState = 0;
1556 const bool IsLastSubReg = i + 1 == e;
1557 const bool IsFirstSubReg = i == 0;
1566 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1567 bool NeedSuperRegImpOperand = e > 1;
1571 unsigned RemEltSize = EltSize;
1579 for (
int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1580 LaneE = RegOffset / 4;
1581 Lane >= LaneE; --Lane) {
1582 bool IsSubReg = e > 1 || EltSize > 4;
1587 if (!MIB.getInstr())
1589 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1591 NeedSuperRegDef =
false;
1593 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1594 NeedSuperRegImpOperand =
true;
1595 unsigned State = SrcDstRegState;
1596 if (!IsLastSubReg || (Lane != LaneE))
1597 State &= ~RegState::Kill;
1598 if (!IsFirstSubReg || (Lane != LaneS))
1599 State &= ~RegState::Define;
1608 if (RemEltSize != EltSize) {
1609 assert(IsFlat && EltSize > 4);
1611 unsigned NumRegs = RemEltSize / 4;
1618 unsigned FinalReg =
SubReg;
1623 if (!TmpIntermediateVGPR) {
1629 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1630 TmpIntermediateVGPR)
1632 if (NeedSuperRegDef)
1636 SubReg = TmpIntermediateVGPR;
1637 }
else if (UseVGPROffset) {
1638 if (!TmpOffsetVGPR) {
1654 if (UseVGPROffset) {
1663 if (SOffset == AMDGPU::NoRegister) {
1665 if (UseVGPROffset && ScratchOffsetReg) {
1666 MIB.
addReg(ScratchOffsetReg);
1673 MIB.addReg(SOffset, SOffsetRegState);
1676 MIB.addImm(
Offset + RegOffset);
1683 MIB.addMemOperand(NewMMO);
1685 if (!IsAGPR && NeedSuperRegDef)
1688 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1695 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1719 if (!IsStore &&
MI !=
MBB.
end() &&
MI->isReturn() &&
1722 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1726 if (ScratchOffsetRegDelta != 0) {
1730 .
addImm(-ScratchOffsetRegDelta);
1736 bool IsKill)
const {
1754 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1759 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1770 bool SpillToPhysVGPRLane)
const {
1776 bool SpillToVGPR = !VGPRSpills.
empty();
1777 if (OnlyToVGPR && !SpillToVGPR)
1786 "Num of VGPR lanes should be equal to num of SGPRs spilled");
1788 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
1795 bool IsFirstSubreg = i == 0;
1797 bool UseKill = SB.
IsKill && IsLastSubreg;
1803 SB.
TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
1820 if (SB.
NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
1840 for (
unsigned i =
Offset * PVD.PerVGPR,
1850 SB.
TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.
TmpVGPR)
1867 unsigned SuperKillState = 0;
1881 MI->eraseFromParent();
1893 bool SpillToPhysVGPRLane)
const {
1899 bool SpillToVGPR = !VGPRSpills.
empty();
1900 if (OnlyToVGPR && !SpillToVGPR)
1904 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
1912 SB.
TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR),
SubReg)
1935 for (
unsigned i =
Offset * PVD.PerVGPR,
1943 bool LastSubReg = (i + 1 == e);
1945 SB.
TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR),
SubReg)
1962 MI->eraseFromParent();
1982 for (
unsigned i =
Offset * PVD.PerVGPR,
2001 unsigned SuperKillState = 0;
2011 MI = RestoreMBB.
end();
2017 for (
unsigned i =
Offset * PVD.PerVGPR,
2024 bool LastSubReg = (i + 1 == e);
2045 switch (
MI->getOpcode()) {
2046 case AMDGPU::SI_SPILL_S1024_SAVE:
2047 case AMDGPU::SI_SPILL_S512_SAVE:
2048 case AMDGPU::SI_SPILL_S384_SAVE:
2049 case AMDGPU::SI_SPILL_S352_SAVE:
2050 case AMDGPU::SI_SPILL_S320_SAVE:
2051 case AMDGPU::SI_SPILL_S288_SAVE:
2052 case AMDGPU::SI_SPILL_S256_SAVE:
2053 case AMDGPU::SI_SPILL_S224_SAVE:
2054 case AMDGPU::SI_SPILL_S192_SAVE:
2055 case AMDGPU::SI_SPILL_S160_SAVE:
2056 case AMDGPU::SI_SPILL_S128_SAVE:
2057 case AMDGPU::SI_SPILL_S96_SAVE:
2058 case AMDGPU::SI_SPILL_S64_SAVE:
2059 case AMDGPU::SI_SPILL_S32_SAVE:
2060 return spillSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane);
2061 case AMDGPU::SI_SPILL_S1024_RESTORE:
2062 case AMDGPU::SI_SPILL_S512_RESTORE:
2063 case AMDGPU::SI_SPILL_S384_RESTORE:
2064 case AMDGPU::SI_SPILL_S352_RESTORE:
2065 case AMDGPU::SI_SPILL_S320_RESTORE:
2066 case AMDGPU::SI_SPILL_S288_RESTORE:
2067 case AMDGPU::SI_SPILL_S256_RESTORE:
2068 case AMDGPU::SI_SPILL_S224_RESTORE:
2069 case AMDGPU::SI_SPILL_S192_RESTORE:
2070 case AMDGPU::SI_SPILL_S160_RESTORE:
2071 case AMDGPU::SI_SPILL_S128_RESTORE:
2072 case AMDGPU::SI_SPILL_S96_RESTORE:
2073 case AMDGPU::SI_SPILL_S64_RESTORE:
2074 case AMDGPU::SI_SPILL_S32_RESTORE:
2075 return restoreSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane);
2082 int SPAdj,
unsigned FIOperandNum,
2091 assert(SPAdj == 0 &&
"unhandled SP adjustment in call sequence?");
2094 int Index =
MI->getOperand(FIOperandNum).getIndex();
2100 switch (
MI->getOpcode()) {
2102 case AMDGPU::SI_SPILL_S1024_SAVE:
2103 case AMDGPU::SI_SPILL_S512_SAVE:
2104 case AMDGPU::SI_SPILL_S384_SAVE:
2105 case AMDGPU::SI_SPILL_S352_SAVE:
2106 case AMDGPU::SI_SPILL_S320_SAVE:
2107 case AMDGPU::SI_SPILL_S288_SAVE:
2108 case AMDGPU::SI_SPILL_S256_SAVE:
2109 case AMDGPU::SI_SPILL_S224_SAVE:
2110 case AMDGPU::SI_SPILL_S192_SAVE:
2111 case AMDGPU::SI_SPILL_S160_SAVE:
2112 case AMDGPU::SI_SPILL_S128_SAVE:
2113 case AMDGPU::SI_SPILL_S96_SAVE:
2114 case AMDGPU::SI_SPILL_S64_SAVE:
2115 case AMDGPU::SI_SPILL_S32_SAVE: {
2120 case AMDGPU::SI_SPILL_S1024_RESTORE:
2121 case AMDGPU::SI_SPILL_S512_RESTORE:
2122 case AMDGPU::SI_SPILL_S384_RESTORE:
2123 case AMDGPU::SI_SPILL_S352_RESTORE:
2124 case AMDGPU::SI_SPILL_S320_RESTORE:
2125 case AMDGPU::SI_SPILL_S288_RESTORE:
2126 case AMDGPU::SI_SPILL_S256_RESTORE:
2127 case AMDGPU::SI_SPILL_S224_RESTORE:
2128 case AMDGPU::SI_SPILL_S192_RESTORE:
2129 case AMDGPU::SI_SPILL_S160_RESTORE:
2130 case AMDGPU::SI_SPILL_S128_RESTORE:
2131 case AMDGPU::SI_SPILL_S96_RESTORE:
2132 case AMDGPU::SI_SPILL_S64_RESTORE:
2133 case AMDGPU::SI_SPILL_S32_RESTORE: {
2138 case AMDGPU::SI_SPILL_V1024_SAVE:
2139 case AMDGPU::SI_SPILL_V512_SAVE:
2140 case AMDGPU::SI_SPILL_V384_SAVE:
2141 case AMDGPU::SI_SPILL_V352_SAVE:
2142 case AMDGPU::SI_SPILL_V320_SAVE:
2143 case AMDGPU::SI_SPILL_V288_SAVE:
2144 case AMDGPU::SI_SPILL_V256_SAVE:
2145 case AMDGPU::SI_SPILL_V224_SAVE:
2146 case AMDGPU::SI_SPILL_V192_SAVE:
2147 case AMDGPU::SI_SPILL_V160_SAVE:
2148 case AMDGPU::SI_SPILL_V128_SAVE:
2149 case AMDGPU::SI_SPILL_V96_SAVE:
2150 case AMDGPU::SI_SPILL_V64_SAVE:
2151 case AMDGPU::SI_SPILL_V32_SAVE:
2152 case AMDGPU::SI_SPILL_A1024_SAVE:
2153 case AMDGPU::SI_SPILL_A512_SAVE:
2154 case AMDGPU::SI_SPILL_A384_SAVE:
2155 case AMDGPU::SI_SPILL_A352_SAVE:
2156 case AMDGPU::SI_SPILL_A320_SAVE:
2157 case AMDGPU::SI_SPILL_A288_SAVE:
2158 case AMDGPU::SI_SPILL_A256_SAVE:
2159 case AMDGPU::SI_SPILL_A224_SAVE:
2160 case AMDGPU::SI_SPILL_A192_SAVE:
2161 case AMDGPU::SI_SPILL_A160_SAVE:
2162 case AMDGPU::SI_SPILL_A128_SAVE:
2163 case AMDGPU::SI_SPILL_A96_SAVE:
2164 case AMDGPU::SI_SPILL_A64_SAVE:
2165 case AMDGPU::SI_SPILL_A32_SAVE:
2166 case AMDGPU::SI_SPILL_AV1024_SAVE:
2167 case AMDGPU::SI_SPILL_AV512_SAVE:
2168 case AMDGPU::SI_SPILL_AV384_SAVE:
2169 case AMDGPU::SI_SPILL_AV352_SAVE:
2170 case AMDGPU::SI_SPILL_AV320_SAVE:
2171 case AMDGPU::SI_SPILL_AV288_SAVE:
2172 case AMDGPU::SI_SPILL_AV256_SAVE:
2173 case AMDGPU::SI_SPILL_AV224_SAVE:
2174 case AMDGPU::SI_SPILL_AV192_SAVE:
2175 case AMDGPU::SI_SPILL_AV160_SAVE:
2176 case AMDGPU::SI_SPILL_AV128_SAVE:
2177 case AMDGPU::SI_SPILL_AV96_SAVE:
2178 case AMDGPU::SI_SPILL_AV64_SAVE:
2179 case AMDGPU::SI_SPILL_AV32_SAVE:
2180 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
2181 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2183 AMDGPU::OpName::vdata);
2184 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2188 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2189 auto *
MBB =
MI->getParent();
2190 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2191 if (IsWWMRegSpill) {
2197 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2198 *
MI->memoperands_begin(), RS);
2203 MI->eraseFromParent();
2206 case AMDGPU::SI_SPILL_V32_RESTORE:
2207 case AMDGPU::SI_SPILL_V64_RESTORE:
2208 case AMDGPU::SI_SPILL_V96_RESTORE:
2209 case AMDGPU::SI_SPILL_V128_RESTORE:
2210 case AMDGPU::SI_SPILL_V160_RESTORE:
2211 case AMDGPU::SI_SPILL_V192_RESTORE:
2212 case AMDGPU::SI_SPILL_V224_RESTORE:
2213 case AMDGPU::SI_SPILL_V256_RESTORE:
2214 case AMDGPU::SI_SPILL_V288_RESTORE:
2215 case AMDGPU::SI_SPILL_V320_RESTORE:
2216 case AMDGPU::SI_SPILL_V352_RESTORE:
2217 case AMDGPU::SI_SPILL_V384_RESTORE:
2218 case AMDGPU::SI_SPILL_V512_RESTORE:
2219 case AMDGPU::SI_SPILL_V1024_RESTORE:
2220 case AMDGPU::SI_SPILL_A32_RESTORE:
2221 case AMDGPU::SI_SPILL_A64_RESTORE:
2222 case AMDGPU::SI_SPILL_A96_RESTORE:
2223 case AMDGPU::SI_SPILL_A128_RESTORE:
2224 case AMDGPU::SI_SPILL_A160_RESTORE:
2225 case AMDGPU::SI_SPILL_A192_RESTORE:
2226 case AMDGPU::SI_SPILL_A224_RESTORE:
2227 case AMDGPU::SI_SPILL_A256_RESTORE:
2228 case AMDGPU::SI_SPILL_A288_RESTORE:
2229 case AMDGPU::SI_SPILL_A320_RESTORE:
2230 case AMDGPU::SI_SPILL_A352_RESTORE:
2231 case AMDGPU::SI_SPILL_A384_RESTORE:
2232 case AMDGPU::SI_SPILL_A512_RESTORE:
2233 case AMDGPU::SI_SPILL_A1024_RESTORE:
2234 case AMDGPU::SI_SPILL_AV32_RESTORE:
2235 case AMDGPU::SI_SPILL_AV64_RESTORE:
2236 case AMDGPU::SI_SPILL_AV96_RESTORE:
2237 case AMDGPU::SI_SPILL_AV128_RESTORE:
2238 case AMDGPU::SI_SPILL_AV160_RESTORE:
2239 case AMDGPU::SI_SPILL_AV192_RESTORE:
2240 case AMDGPU::SI_SPILL_AV224_RESTORE:
2241 case AMDGPU::SI_SPILL_AV256_RESTORE:
2242 case AMDGPU::SI_SPILL_AV288_RESTORE:
2243 case AMDGPU::SI_SPILL_AV320_RESTORE:
2244 case AMDGPU::SI_SPILL_AV352_RESTORE:
2245 case AMDGPU::SI_SPILL_AV384_RESTORE:
2246 case AMDGPU::SI_SPILL_AV512_RESTORE:
2247 case AMDGPU::SI_SPILL_AV1024_RESTORE:
2248 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
2249 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
2251 AMDGPU::OpName::vdata);
2252 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2256 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2257 auto *
MBB =
MI->getParent();
2258 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2259 if (IsWWMRegSpill) {
2266 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2267 *
MI->memoperands_begin(), RS);
2272 MI->eraseFromParent();
2280 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2282 if (
TII->isFLATScratch(*
MI)) {
2283 assert((int16_t)FIOperandNum ==
2285 AMDGPU::OpName::saddr));
2292 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset);
2296 OffsetOp->
setImm(NewOffset);
2303 unsigned Opc =
MI->getOpcode();
2317 AMDGPU::OpName::vdst_in);
2318 bool TiedVDst = VDstIn != -1 &&
2319 MI->getOperand(VDstIn).isReg() &&
2320 MI->getOperand(VDstIn).isTied();
2322 MI->untieRegOperand(VDstIn);
2332 assert (NewVDst != -1 && NewVDstIn != -1 &&
"Must be tied!");
2333 MI->tieOperands(NewVDst, NewVDstIn);
2335 MI->setDesc(
TII->get(NewOpc));
2343 if (
TII->isImmOperandLegal(*
MI, FIOperandNum, FIOp))
2350 bool UseSGPR =
TII->isOperandLegal(*
MI, FIOperandNum, &FIOp);
2352 if (!
Offset && FrameReg && UseSGPR) {
2358 : &AMDGPU::VGPR_32RegClass;
2365 if ((!FrameReg || !
Offset) && TmpReg) {
2366 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2369 MIB.addReg(FrameReg);
2377 RS->
isRegUsed(AMDGPU::SCC) && !
MI->definesRegister(AMDGPU::SCC);
2382 MI,
false, 0, !UseSGPR);
2386 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2397 assert(!(
Offset & 0x1) &&
"Flat scratch offset must be aligned!");
2417 if (TmpSReg == FrameReg) {
2419 if (NeedSaveSCC && !
MI->registerDefIsDead(AMDGPU::SCC)) {
2443 bool IsMUBUF =
TII->isMUBUF(*
MI);
2450 RS->
isRegUsed(AMDGPU::SCC) && !
MI->definesRegister(AMDGPU::SCC);
2452 ? &AMDGPU::SReg_32RegClass
2453 : &AMDGPU::VGPR_32RegClass;
2454 bool IsCopy =
MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2455 MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
2457 IsCopy ?
MI->getOperand(0).getReg()
2460 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2462 unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
2463 : AMDGPU::V_LSHRREV_B32_e64;
2465 if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
2471 if (IsSALU && !LiveSCC)
2472 Shift.getInstr()->getOperand(3).setIsDead();
2473 if (IsSALU && LiveSCC) {
2475 AMDGPU::SReg_32RegClass, Shift,
false, 0);
2479 ResultReg = NewDest;
2484 if ((MIB =
TII->getAddNoCarry(*
MBB,
MI,
DL, ResultReg, *RS)) !=
2494 const bool IsVOP2 = MIB->
getOpcode() == AMDGPU::V_ADD_U32_e32;
2505 "Need to reuse carry out register");
2510 ConstOffsetReg = getSubReg(MIB.
getReg(1), AMDGPU::sub0);
2512 ConstOffsetReg = MIB.
getReg(1);
2522 if (!MIB || IsSALU) {
2530 AMDGPU::SReg_32_XM0RegClass,
MI,
false, 0,
false);
2531 Register ScaledReg = TmpScaledReg.
isValid() ? TmpScaledReg : FrameReg;
2543 ResultReg = ScaledReg;
2546 if (!TmpScaledReg.
isValid()) {
2559 MI->eraseFromParent();
2568 assert(
static_cast<int>(FIOperandNum) ==
2570 AMDGPU::OpName::vaddr));
2572 auto &SOffset = *
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset);
2573 assert((SOffset.isImm() && SOffset.getImm() == 0));
2575 if (FrameReg != AMDGPU::NoRegister)
2576 SOffset.ChangeToRegister(FrameReg,
false);
2578 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2580 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm();
2581 int64_t NewOffset = OldImm +
Offset;
2583 if (
TII->isLegalMUBUFImmOffset(NewOffset) &&
2585 MI->eraseFromParent();
2594 if (!
TII->isImmOperandLegal(*
MI, FIOperandNum, FIOp)) {
2617 return &AMDGPU::VReg_64RegClass;
2619 return &AMDGPU::VReg_96RegClass;
2621 return &AMDGPU::VReg_128RegClass;
2623 return &AMDGPU::VReg_160RegClass;
2625 return &AMDGPU::VReg_192RegClass;
2627 return &AMDGPU::VReg_224RegClass;
2629 return &AMDGPU::VReg_256RegClass;
2631 return &AMDGPU::VReg_288RegClass;
2633 return &AMDGPU::VReg_320RegClass;
2635 return &AMDGPU::VReg_352RegClass;
2637 return &AMDGPU::VReg_384RegClass;
2639 return &AMDGPU::VReg_512RegClass;
2641 return &AMDGPU::VReg_1024RegClass;
2649 return &AMDGPU::VReg_64_Align2RegClass;
2651 return &AMDGPU::VReg_96_Align2RegClass;
2653 return &AMDGPU::VReg_128_Align2RegClass;
2655 return &AMDGPU::VReg_160_Align2RegClass;
2657 return &AMDGPU::VReg_192_Align2RegClass;
2659 return &AMDGPU::VReg_224_Align2RegClass;
2661 return &AMDGPU::VReg_256_Align2RegClass;
2663 return &AMDGPU::VReg_288_Align2RegClass;
2665 return &AMDGPU::VReg_320_Align2RegClass;
2667 return &AMDGPU::VReg_352_Align2RegClass;
2669 return &AMDGPU::VReg_384_Align2RegClass;
2671 return &AMDGPU::VReg_512_Align2RegClass;
2673 return &AMDGPU::VReg_1024_Align2RegClass;
2681 return &AMDGPU::VReg_1RegClass;
2683 return &AMDGPU::VGPR_16RegClass;
2685 return &AMDGPU::VGPR_32RegClass;
2693 return &AMDGPU::AReg_64RegClass;
2695 return &AMDGPU::AReg_96RegClass;
2697 return &AMDGPU::AReg_128RegClass;
2699 return &AMDGPU::AReg_160RegClass;
2701 return &AMDGPU::AReg_192RegClass;
2703 return &AMDGPU::AReg_224RegClass;
2705 return &AMDGPU::AReg_256RegClass;
2707 return &AMDGPU::AReg_288RegClass;
2709 return &AMDGPU::AReg_320RegClass;
2711 return &AMDGPU::AReg_352RegClass;
2713 return &AMDGPU::AReg_384RegClass;
2715 return &AMDGPU::AReg_512RegClass;
2717 return &AMDGPU::AReg_1024RegClass;
2725 return &AMDGPU::AReg_64_Align2RegClass;
2727 return &AMDGPU::AReg_96_Align2RegClass;
2729 return &AMDGPU::AReg_128_Align2RegClass;
2731 return &AMDGPU::AReg_160_Align2RegClass;
2733 return &AMDGPU::AReg_192_Align2RegClass;
2735 return &AMDGPU::AReg_224_Align2RegClass;
2737 return &AMDGPU::AReg_256_Align2RegClass;
2739 return &AMDGPU::AReg_288_Align2RegClass;
2741 return &AMDGPU::AReg_320_Align2RegClass;
2743 return &AMDGPU::AReg_352_Align2RegClass;
2745 return &AMDGPU::AReg_384_Align2RegClass;
2747 return &AMDGPU::AReg_512_Align2RegClass;
2749 return &AMDGPU::AReg_1024_Align2RegClass;
2757 return &AMDGPU::AGPR_LO16RegClass;
2759 return &AMDGPU::AGPR_32RegClass;
2767 return &AMDGPU::AV_64RegClass;
2769 return &AMDGPU::AV_96RegClass;
2771 return &AMDGPU::AV_128RegClass;
2773 return &AMDGPU::AV_160RegClass;
2775 return &AMDGPU::AV_192RegClass;
2777 return &AMDGPU::AV_224RegClass;
2779 return &AMDGPU::AV_256RegClass;
2781 return &AMDGPU::AV_288RegClass;
2783 return &AMDGPU::AV_320RegClass;
2785 return &AMDGPU::AV_352RegClass;
2787 return &AMDGPU::AV_384RegClass;
2789 return &AMDGPU::AV_512RegClass;
2791 return &AMDGPU::AV_1024RegClass;
2799 return &AMDGPU::AV_64_Align2RegClass;
2801 return &AMDGPU::AV_96_Align2RegClass;
2803 return &AMDGPU::AV_128_Align2RegClass;
2805 return &AMDGPU::AV_160_Align2RegClass;
2807 return &AMDGPU::AV_192_Align2RegClass;
2809 return &AMDGPU::AV_224_Align2RegClass;
2811 return &AMDGPU::AV_256_Align2RegClass;
2813 return &AMDGPU::AV_288_Align2RegClass;
2815 return &AMDGPU::AV_320_Align2RegClass;
2817 return &AMDGPU::AV_352_Align2RegClass;
2819 return &AMDGPU::AV_384_Align2RegClass;
2821 return &AMDGPU::AV_512_Align2RegClass;
2823 return &AMDGPU::AV_1024_Align2RegClass;
2831 return &AMDGPU::AV_32RegClass;
2840 return &AMDGPU::SGPR_LO16RegClass;
2842 return &AMDGPU::SReg_32RegClass;
2844 return &AMDGPU::SReg_64RegClass;
2846 return &AMDGPU::SGPR_96RegClass;
2848 return &AMDGPU::SGPR_128RegClass;
2850 return &AMDGPU::SGPR_160RegClass;
2852 return &AMDGPU::SGPR_192RegClass;
2854 return &AMDGPU::SGPR_224RegClass;
2856 return &AMDGPU::SGPR_256RegClass;
2858 return &AMDGPU::SGPR_288RegClass;
2860 return &AMDGPU::SGPR_320RegClass;
2862 return &AMDGPU::SGPR_352RegClass;
2864 return &AMDGPU::SGPR_384RegClass;
2866 return &AMDGPU::SGPR_512RegClass;
2868 return &AMDGPU::SGPR_1024RegClass;
2876 if (Reg.isVirtual())
2877 RC =
MRI.getRegClass(Reg);
2879 RC = getPhysRegBaseClass(Reg);
2885 unsigned Size = getRegSizeInBits(*SRC);
2887 assert(VRC &&
"Invalid register class size");
2893 unsigned Size = getRegSizeInBits(*SRC);
2895 assert(ARC &&
"Invalid register class size");
2901 unsigned Size = getRegSizeInBits(*VRC);
2903 return &AMDGPU::SGPR_32RegClass;
2905 assert(SRC &&
"Invalid register class size");
2912 unsigned SubIdx)
const {
2915 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2916 return MatchRC && MatchRC->
hasSubClassEq(SuperRC) ? MatchRC :
nullptr;
2932 unsigned SrcSubReg)
const {
2949 return getCommonSubClass(DefRC, SrcRC) !=
nullptr;
2965 if (ReserveHighestRegister) {
2967 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
2971 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
2988 unsigned EltSize)
const {
2990 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
2992 const unsigned RegDWORDs = RegBitWidth / 32;
2993 const unsigned EltDWORDs = EltSize / 4;
2994 assert(RegSplitParts.size() + 1 >= EltDWORDs);
2996 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2997 const unsigned NumParts = RegDWORDs / EltDWORDs;
2999 return ArrayRef(Parts.data(), NumParts);
3005 return Reg.isVirtual() ?
MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
3012 return getSubRegisterClass(SrcRC, MO.
getSubReg());
3037 unsigned SrcSize = getRegSizeInBits(*SrcRC);
3038 unsigned DstSize = getRegSizeInBits(*DstRC);
3039 unsigned NewSize = getRegSizeInBits(*NewRC);
3045 if (SrcSize <= 32 || DstSize <= 32)
3048 return NewSize <= DstSize || NewSize <= SrcSize;
3057 switch (RC->
getID()) {
3059 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
3060 case AMDGPU::VGPR_32RegClassID:
3062 case AMDGPU::SGPR_32RegClassID:
3063 case AMDGPU::SGPR_LO16RegClassID:
3069 unsigned Idx)
const {
3070 if (
Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
3071 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
3075 if (
Idx == AMDGPU::RegisterPressureSets::SReg_32)
3083 static const int Empty[] = { -1 };
3085 if (RegPressureIgnoredUnits[RegUnit])
3088 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3093 return AMDGPU::SGPR30_SGPR31;
3099 switch (RB.
getID()) {
3100 case AMDGPU::VGPRRegBankID:
3103 case AMDGPU::VCCRegBankID:
3105 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3106 : &AMDGPU::SReg_64_XEXECRegClass;
3107 case AMDGPU::SGPRRegBankID:
3109 case AMDGPU::AGPRRegBankID:
3124 return getAllocatableClass(RC);
3130 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3134 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3140 : &AMDGPU::VReg_64RegClass;
3145 switch ((
int)RCID) {
3146 case AMDGPU::SReg_1RegClassID:
3148 case AMDGPU::SReg_1_XEXECRegClassID:
3149 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3150 : &AMDGPU::SReg_64_XEXECRegClass;
3154 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3167 if (Reg.isVirtual()) {
3172 :
MRI.getMaxLaneMaskForVReg(Reg);
3176 if ((S.LaneMask & SubLanes) == SubLanes) {
3177 V = S.getVNInfoAt(UseIdx);
3189 for (
MCRegUnit Unit : regunits(Reg.asMCReg())) {
3204 if (!Def || !MDT.dominates(Def, &
Use))
3207 assert(Def->modifiesRegister(Reg,
this));
3213 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3216 AMDGPU::SReg_32RegClass,
3217 AMDGPU::AGPR_32RegClass } ) {
3218 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3221 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3222 &AMDGPU::VGPR_32RegClass)) {
3226 return AMDGPU::NoRegister;
3249 unsigned Size = getRegSizeInBits(*RC);
3283 return std::min(128u, getSubRegIdxSize(
SubReg));
3287 return std::min(32u, getSubRegIdxSize(
SubReg));
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))
static unsigned getNumSubRegsForSpillOp(unsigned Op)
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static const char * getRegisterName(MCRegister Reg)
uint32_t getLDSSize() const
bool isBottomOfStack() const
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
This class represents an Operation in the Expression.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool enableFlatScratch() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
const SIFrameLowering * getFrameLowering() const override
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasFlatScratchSTMode() const
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
A set of register units used to track register liveness.
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Describe properties that are true of each instruction in the target description file.
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void assignRegToScavengingIndex(int FI, Register Reg, MachineInstr *Restore=nullptr)
Record that Reg is in use at scavenging index FI.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
static bool isFLATScratch(const MachineInstr &MI)
static bool isMUBUF(const MachineInstr &MI)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
Register getSGPRForEXECCopy() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Register getVGPRForAGPRCopy() const
Register getFrameOffsetReg() const
void addToSpilledVGPRs(unsigned num)
const ReservedRegSet & getWWMReservedRegs() const
void addToSpilledSGPRs(unsigned num)
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const
Special case of eliminateFrameIndex.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
static bool isChainScratchRegister(Register VGPR)
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const
Return the largest available SGPR aligned to Align for the register class RC.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
bool isValid() const
Returns true if this is a valid index.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
StringRef - Represent a constant reference to a string, i.e.
const uint8_t TSFlags
Configurable target specific flags.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
A Use represents the edge between a Value definition and its users.
VNInfo - Value Number Information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
@ OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_LAST
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
auto reverse(ContainerTy &&C)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
The llvm::once_flag structure.