29 #define GET_REGINFO_TARGET_DESC
30 #include "AMDGPUGenRegisterInfo.inc"
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc(
"Enable spilling VGPRs to SGPRs"),
38 std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39 std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
116 MI->getOperand(0).isKill(),
Index,
RS) {}
131 MovOpc = AMDGPU::S_MOV_B32;
132 NotOpc = AMDGPU::S_NOT_B32;
135 MovOpc = AMDGPU::S_MOV_B64;
136 NotOpc = AMDGPU::S_NOT_B64;
141 SuperReg != AMDGPU::EXEC &&
"exec should never spill");
172 assert(
RS &&
"Cannot spill SGPR to memory without RegScavenger");
200 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
221 MI->emitError(
"unhandled SGPR spill to memory");
231 I->getOperand(2).setIsDead();
266 I->getOperand(2).setIsDead();
296 MI->emitError(
"unhandled SGPR spill to memory");
323 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
324 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
326 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
327 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
328 "getNumCoveredRegs() will not work with generated subreg masks!");
330 RegPressureIgnoredUnits.
resize(getNumRegUnits());
331 RegPressureIgnoredUnits.
set(
333 for (
auto Reg : AMDGPU::VGPR_HI16RegClass)
339 static auto InitializeRegSplitPartsOnce = [
this]() {
340 for (
unsigned Idx = 1,
E = getNumSubRegIndices() - 1; Idx <
E; ++Idx) {
341 unsigned Size = getSubRegIdxSize(Idx);
344 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
345 unsigned Pos = getSubRegIdxOffset(Idx);
350 unsigned MaxNumParts = 1024 / Size;
351 Vec.resize(MaxNumParts);
359 static auto InitializeSubRegFromChannelTableOnce = [
this]() {
360 for (
auto &Row : SubRegFromChannelTable)
361 Row.fill(AMDGPU::NoSubRegister);
362 for (
unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
363 unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32;
364 unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32;
369 unsigned TableIdx =
Width - 1;
370 assert(TableIdx < SubRegFromChannelTable.size());
372 SubRegFromChannelTable[TableIdx][
Offset] = Idx;
376 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
378 InitializeSubRegFromChannelTableOnce);
381 void SIRegisterInfo::reserveRegisterTuples(
BitVector &Reserved,
385 for (; R.isValid(); ++R)
398 : CSR_AMDGPU_SaveList;
400 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
401 : CSR_AMDGPU_SI_Gfx_SaveList;
404 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
405 return &NoCalleeSavedReg;
422 : CSR_AMDGPU_RegMask;
424 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
425 : CSR_AMDGPU_SI_Gfx_RegMask;
432 return CSR_AMDGPU_NoRegs_RegMask;
443 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
444 return &AMDGPU::AV_32RegClass;
445 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
446 return &AMDGPU::AV_64RegClass;
447 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
448 RC == &AMDGPU::AReg_64_Align2RegClass)
449 return &AMDGPU::AV_64_Align2RegClass;
450 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
451 return &AMDGPU::AV_96RegClass;
452 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
453 RC == &AMDGPU::AReg_96_Align2RegClass)
454 return &AMDGPU::AV_96_Align2RegClass;
455 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
456 return &AMDGPU::AV_128RegClass;
457 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
458 RC == &AMDGPU::AReg_128_Align2RegClass)
459 return &AMDGPU::AV_128_Align2RegClass;
460 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
461 return &AMDGPU::AV_160RegClass;
462 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
463 RC == &AMDGPU::AReg_160_Align2RegClass)
464 return &AMDGPU::AV_160_Align2RegClass;
465 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
466 return &AMDGPU::AV_192RegClass;
467 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
468 RC == &AMDGPU::AReg_192_Align2RegClass)
469 return &AMDGPU::AV_192_Align2RegClass;
470 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
471 return &AMDGPU::AV_256RegClass;
472 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
473 RC == &AMDGPU::AReg_256_Align2RegClass)
474 return &AMDGPU::AV_256_Align2RegClass;
475 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
476 return &AMDGPU::AV_512RegClass;
477 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
478 RC == &AMDGPU::AReg_512_Align2RegClass)
479 return &AMDGPU::AV_512_Align2RegClass;
480 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
481 return &AMDGPU::AV_1024RegClass;
482 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
483 RC == &AMDGPU::AReg_1024_Align2RegClass)
484 return &AMDGPU::AV_1024_Align2RegClass;
514 return AMDGPU_AllVGPRs_RegMask;
518 return AMDGPU_AllAGPRs_RegMask;
522 return AMDGPU_AllVectorRegs_RegMask;
526 return AMDGPU_AllAllocatableSRegs_RegMask;
533 assert(NumRegIndex &&
"Not implemented");
534 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].
size());
535 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
541 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
542 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
555 reserveRegisterTuples(
Reserved, AMDGPU::EXEC);
556 reserveRegisterTuples(
Reserved, AMDGPU::FLAT_SCR);
562 reserveRegisterTuples(
Reserved, AMDGPU::SRC_VCCZ);
563 reserveRegisterTuples(
Reserved, AMDGPU::SRC_EXECZ);
564 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SCC);
567 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_BASE);
568 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_LIMIT);
569 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_BASE);
570 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
573 reserveRegisterTuples(
Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
576 reserveRegisterTuples(
Reserved, AMDGPU::XNACK_MASK);
579 reserveRegisterTuples(
Reserved, AMDGPU::LDS_DIRECT);
582 reserveRegisterTuples(
Reserved, AMDGPU::TBA);
583 reserveRegisterTuples(
Reserved, AMDGPU::TMA);
584 reserveRegisterTuples(
Reserved, AMDGPU::TTMP0_TTMP1);
585 reserveRegisterTuples(
Reserved, AMDGPU::TTMP2_TTMP3);
586 reserveRegisterTuples(
Reserved, AMDGPU::TTMP4_TTMP5);
587 reserveRegisterTuples(
Reserved, AMDGPU::TTMP6_TTMP7);
588 reserveRegisterTuples(
Reserved, AMDGPU::TTMP8_TTMP9);
589 reserveRegisterTuples(
Reserved, AMDGPU::TTMP10_TTMP11);
590 reserveRegisterTuples(
Reserved, AMDGPU::TTMP12_TTMP13);
591 reserveRegisterTuples(
Reserved, AMDGPU::TTMP14_TTMP15);
594 reserveRegisterTuples(
Reserved, AMDGPU::SGPR_NULL64);
606 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
607 for (
unsigned i = MaxNumSGPRs;
i < TotalNumSGPRs; ++
i) {
608 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(
i);
612 for (
auto Reg : AMDGPU::SReg_32RegClass) {
621 if (ScratchRSrcReg != AMDGPU::NoRegister) {
625 reserveRegisterTuples(
Reserved, ScratchRSrcReg);
633 reserveRegisterTuples(
Reserved, StackPtrReg);
634 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
639 reserveRegisterTuples(
Reserved, FrameReg);
640 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
645 reserveRegisterTuples(
Reserved, BasePtrReg);
646 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
652 unsigned MaxNumAGPRs = MaxNumVGPRs;
653 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
657 for (
unsigned i = 0;
i < MaxNumAGPRs; ++
i) {
658 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(
i);
663 for (
auto Reg : AMDGPU::AGPR_32RegClass) {
678 MaxNumAGPRs = MaxNumVGPRs;
680 if (MaxNumVGPRs > TotalNumVGPRs) {
681 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
682 MaxNumVGPRs = TotalNumVGPRs;
688 for (
unsigned i = MaxNumVGPRs;
i < TotalNumVGPRs; ++
i) {
689 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(
i);
693 for (
unsigned i = MaxNumAGPRs;
i < TotalNumVGPRs; ++
i) {
694 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(
i);
732 if (
Info->isEntryFunction())
740 if (
Info->isEntryFunction()) {
774 AMDGPU::OpName::offset);
775 return MI->getOperand(OffIdx).getImm();
784 AMDGPU::OpName::vaddr) ||
786 AMDGPU::OpName::saddr))) &&
787 "Should never see frame index on non-address operand");
813 DL =
Ins->getDebugLoc();
819 : AMDGPU::V_MOV_B32_e32;
823 : &AMDGPU::VGPR_32RegClass);
835 : &AMDGPU::VGPR_32RegClass);
860 bool IsFlat =
TII->isFLATScratch(
MI);
876 TII->getNamedOperand(
MI, IsFlat ? AMDGPU::OpName::saddr
877 : AMDGPU::OpName::vaddr);
882 assert(FIOp && FIOp->
isFI() &&
"frame index must be address operand");
888 "offset should be legal");
890 OffsetOp->
setImm(NewOffset);
900 "offset should be legal");
903 OffsetOp->
setImm(NewOffset);
927 return &AMDGPU::VGPR_32RegClass;
934 if (RC == &AMDGPU::SCC_CLASSRegClass)
943 case AMDGPU::SI_SPILL_S1024_SAVE:
944 case AMDGPU::SI_SPILL_S1024_RESTORE:
945 case AMDGPU::SI_SPILL_V1024_SAVE:
946 case AMDGPU::SI_SPILL_V1024_RESTORE:
947 case AMDGPU::SI_SPILL_A1024_SAVE:
948 case AMDGPU::SI_SPILL_A1024_RESTORE:
949 case AMDGPU::SI_SPILL_AV1024_SAVE:
950 case AMDGPU::SI_SPILL_AV1024_RESTORE:
952 case AMDGPU::SI_SPILL_S512_SAVE:
953 case AMDGPU::SI_SPILL_S512_RESTORE:
954 case AMDGPU::SI_SPILL_V512_SAVE:
955 case AMDGPU::SI_SPILL_V512_RESTORE:
956 case AMDGPU::SI_SPILL_A512_SAVE:
957 case AMDGPU::SI_SPILL_A512_RESTORE:
958 case AMDGPU::SI_SPILL_AV512_SAVE:
959 case AMDGPU::SI_SPILL_AV512_RESTORE:
961 case AMDGPU::SI_SPILL_S384_SAVE:
962 case AMDGPU::SI_SPILL_S384_RESTORE:
963 case AMDGPU::SI_SPILL_V384_SAVE:
964 case AMDGPU::SI_SPILL_V384_RESTORE:
965 case AMDGPU::SI_SPILL_A384_SAVE:
966 case AMDGPU::SI_SPILL_A384_RESTORE:
967 case AMDGPU::SI_SPILL_AV384_SAVE:
968 case AMDGPU::SI_SPILL_AV384_RESTORE:
970 case AMDGPU::SI_SPILL_S352_SAVE:
971 case AMDGPU::SI_SPILL_S352_RESTORE:
972 case AMDGPU::SI_SPILL_V352_SAVE:
973 case AMDGPU::SI_SPILL_V352_RESTORE:
974 case AMDGPU::SI_SPILL_A352_SAVE:
975 case AMDGPU::SI_SPILL_A352_RESTORE:
976 case AMDGPU::SI_SPILL_AV352_SAVE:
977 case AMDGPU::SI_SPILL_AV352_RESTORE:
979 case AMDGPU::SI_SPILL_S320_SAVE:
980 case AMDGPU::SI_SPILL_S320_RESTORE:
981 case AMDGPU::SI_SPILL_V320_SAVE:
982 case AMDGPU::SI_SPILL_V320_RESTORE:
983 case AMDGPU::SI_SPILL_A320_SAVE:
984 case AMDGPU::SI_SPILL_A320_RESTORE:
985 case AMDGPU::SI_SPILL_AV320_SAVE:
986 case AMDGPU::SI_SPILL_AV320_RESTORE:
988 case AMDGPU::SI_SPILL_S288_SAVE:
989 case AMDGPU::SI_SPILL_S288_RESTORE:
990 case AMDGPU::SI_SPILL_V288_SAVE:
991 case AMDGPU::SI_SPILL_V288_RESTORE:
992 case AMDGPU::SI_SPILL_A288_SAVE:
993 case AMDGPU::SI_SPILL_A288_RESTORE:
994 case AMDGPU::SI_SPILL_AV288_SAVE:
995 case AMDGPU::SI_SPILL_AV288_RESTORE:
997 case AMDGPU::SI_SPILL_S256_SAVE:
998 case AMDGPU::SI_SPILL_S256_RESTORE:
999 case AMDGPU::SI_SPILL_V256_SAVE:
1000 case AMDGPU::SI_SPILL_V256_RESTORE:
1001 case AMDGPU::SI_SPILL_A256_SAVE:
1002 case AMDGPU::SI_SPILL_A256_RESTORE:
1003 case AMDGPU::SI_SPILL_AV256_SAVE:
1004 case AMDGPU::SI_SPILL_AV256_RESTORE:
1006 case AMDGPU::SI_SPILL_S224_SAVE:
1007 case AMDGPU::SI_SPILL_S224_RESTORE:
1008 case AMDGPU::SI_SPILL_V224_SAVE:
1009 case AMDGPU::SI_SPILL_V224_RESTORE:
1010 case AMDGPU::SI_SPILL_A224_SAVE:
1011 case AMDGPU::SI_SPILL_A224_RESTORE:
1012 case AMDGPU::SI_SPILL_AV224_SAVE:
1013 case AMDGPU::SI_SPILL_AV224_RESTORE:
1015 case AMDGPU::SI_SPILL_S192_SAVE:
1016 case AMDGPU::SI_SPILL_S192_RESTORE:
1017 case AMDGPU::SI_SPILL_V192_SAVE:
1018 case AMDGPU::SI_SPILL_V192_RESTORE:
1019 case AMDGPU::SI_SPILL_A192_SAVE:
1020 case AMDGPU::SI_SPILL_A192_RESTORE:
1021 case AMDGPU::SI_SPILL_AV192_SAVE:
1022 case AMDGPU::SI_SPILL_AV192_RESTORE:
1024 case AMDGPU::SI_SPILL_S160_SAVE:
1025 case AMDGPU::SI_SPILL_S160_RESTORE:
1026 case AMDGPU::SI_SPILL_V160_SAVE:
1027 case AMDGPU::SI_SPILL_V160_RESTORE:
1028 case AMDGPU::SI_SPILL_A160_SAVE:
1029 case AMDGPU::SI_SPILL_A160_RESTORE:
1030 case AMDGPU::SI_SPILL_AV160_SAVE:
1031 case AMDGPU::SI_SPILL_AV160_RESTORE:
1033 case AMDGPU::SI_SPILL_S128_SAVE:
1034 case AMDGPU::SI_SPILL_S128_RESTORE:
1035 case AMDGPU::SI_SPILL_V128_SAVE:
1036 case AMDGPU::SI_SPILL_V128_RESTORE:
1037 case AMDGPU::SI_SPILL_A128_SAVE:
1038 case AMDGPU::SI_SPILL_A128_RESTORE:
1039 case AMDGPU::SI_SPILL_AV128_SAVE:
1040 case AMDGPU::SI_SPILL_AV128_RESTORE:
1042 case AMDGPU::SI_SPILL_S96_SAVE:
1043 case AMDGPU::SI_SPILL_S96_RESTORE:
1044 case AMDGPU::SI_SPILL_V96_SAVE:
1045 case AMDGPU::SI_SPILL_V96_RESTORE:
1046 case AMDGPU::SI_SPILL_A96_SAVE:
1047 case AMDGPU::SI_SPILL_A96_RESTORE:
1048 case AMDGPU::SI_SPILL_AV96_SAVE:
1049 case AMDGPU::SI_SPILL_AV96_RESTORE:
1051 case AMDGPU::SI_SPILL_S64_SAVE:
1052 case AMDGPU::SI_SPILL_S64_RESTORE:
1053 case AMDGPU::SI_SPILL_V64_SAVE:
1054 case AMDGPU::SI_SPILL_V64_RESTORE:
1055 case AMDGPU::SI_SPILL_A64_SAVE:
1056 case AMDGPU::SI_SPILL_A64_RESTORE:
1057 case AMDGPU::SI_SPILL_AV64_SAVE:
1058 case AMDGPU::SI_SPILL_AV64_RESTORE:
1060 case AMDGPU::SI_SPILL_S32_SAVE:
1061 case AMDGPU::SI_SPILL_S32_RESTORE:
1062 case AMDGPU::SI_SPILL_V32_SAVE:
1063 case AMDGPU::SI_SPILL_V32_RESTORE:
1064 case AMDGPU::SI_SPILL_A32_SAVE:
1065 case AMDGPU::SI_SPILL_A32_RESTORE:
1066 case AMDGPU::SI_SPILL_AV32_SAVE:
1067 case AMDGPU::SI_SPILL_AV32_RESTORE:
1075 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1076 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1077 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1078 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1079 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1080 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1081 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1082 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1083 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1084 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1085 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1086 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1087 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1088 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1089 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1090 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1098 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1099 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1100 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1101 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1102 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1103 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1104 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1105 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1106 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1107 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1108 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1109 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1110 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1111 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1112 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1113 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1114 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1115 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1116 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1117 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1118 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1119 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1120 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1121 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1122 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1123 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1124 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1125 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1133 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1134 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1135 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1136 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1137 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1138 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1139 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1140 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1141 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1142 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1143 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1144 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1145 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1146 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1147 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1148 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1156 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1157 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1158 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1159 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1160 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1161 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1162 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1163 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1164 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1165 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1166 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1167 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1168 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1169 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1170 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1171 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1172 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1173 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1174 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1175 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1176 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1177 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1178 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1179 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1180 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1181 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1182 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1183 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1192 int Index,
unsigned Lane,
1193 unsigned ValueReg,
bool IsKill) {
1200 if (
Reg == AMDGPU::NoRegister)
1203 bool IsStore =
MI->mayStore();
1207 unsigned Dst = IsStore ?
Reg : ValueReg;
1208 unsigned Src = IsStore ? ValueReg :
Reg;
1211 if (IsVGPR ==
TRI->isVGPR(
MRI, ValueReg)) {
1221 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1222 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1240 bool IsStore =
MI->mayStore();
1242 unsigned Opc =
MI->getOpcode();
1243 int LoadStoreOp = IsStore ?
1245 if (LoadStoreOp == -1)
1255 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::srsrc))
1256 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset))
1263 AMDGPU::OpName::vdata_in);
1265 NewMI.
add(*VDataIn);
1270 unsigned LoadStoreOp,
1272 bool IsStore =
TII->get(LoadStoreOp).mayStore();
1279 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1280 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1283 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1284 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1287 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1288 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1291 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1292 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1308 unsigned LoadStoreOp,
int Index,
Register ValueReg,
bool IsKill,
1311 assert((!RS || !LiveRegs) &&
"Only RS or LiveRegs can be set but not both");
1319 bool IsStore = Desc->mayStore();
1320 bool IsFlat =
TII->isFLATScratch(LoadStoreOp);
1322 bool CanClobberSCC =
false;
1323 bool Scavenged =
false;
1333 unsigned EltSize = (IsFlat && !IsAGPR) ?
std::min(RegWidth, 16u) : 4u;
1334 unsigned NumSubRegs = RegWidth / EltSize;
1335 unsigned Size = NumSubRegs * EltSize;
1336 unsigned RemSize = RegWidth - Size;
1337 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1339 int64_t MaterializedOffset =
Offset;
1341 int64_t MaxOffset =
Offset + Size + RemSize - EltSize;
1342 int64_t ScratchOffsetRegDelta = 0;
1344 if (IsFlat && EltSize > 4) {
1346 Desc = &
TII->get(LoadStoreOp);
1353 "unexpected VGPR spill offset");
1360 bool UseVGPROffset =
false;
1367 if (IsFlat && SGPRBase) {
1391 bool IsOffsetLegal =
1407 }
else if (LiveRegs) {
1417 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1421 UseVGPROffset =
true;
1429 TmpOffsetVGPR =
Reg;
1436 }
else if (!SOffset && CanClobberSCC) {
1447 if (!ScratchOffsetReg)
1449 SOffset = ScratchOffsetReg;
1450 ScratchOffsetRegDelta =
Offset;
1458 if (!IsFlat && !UseVGPROffset)
1461 if (!UseVGPROffset && !SOffset)
1464 if (UseVGPROffset) {
1466 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR,
Offset);
1467 }
else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1472 .
addReg(ScratchOffsetReg)
1474 Add->getOperand(3).setIsDead();
1480 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1482 &&
"Unexpected vaddr for flat scratch with a FI operand");
1484 if (UseVGPROffset) {
1491 Desc = &
TII->get(LoadStoreOp);
1494 for (
unsigned i = 0,
e = NumSubRegs + NumRemSubRegs, RegOffset = 0;
i !=
e;
1495 ++
i, RegOffset += EltSize) {
1496 if (
i == NumSubRegs) {
1500 Desc = &
TII->get(LoadStoreOp);
1502 if (!IsFlat && UseVGPROffset) {
1505 Desc = &
TII->get(NewLoadStoreOp);
1508 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1515 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1518 unsigned NumRegs = EltSize / 4;
1524 unsigned SOffsetRegState = 0;
1526 const bool IsLastSubReg =
i + 1 ==
e;
1527 const bool IsFirstSubReg =
i == 0;
1536 bool NeedSuperRegDef =
e > 1 && IsStore && IsFirstSubReg;
1537 bool NeedSuperRegImpOperand =
e > 1;
1541 unsigned RemEltSize = EltSize;
1549 for (
int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1550 LaneE = RegOffset / 4;
1551 Lane >= LaneE; --Lane) {
1552 bool IsSubReg =
e > 1 || EltSize > 4;
1557 if (!MIB.getInstr())
1559 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1561 NeedSuperRegDef =
false;
1563 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1564 NeedSuperRegImpOperand =
true;
1565 unsigned State = SrcDstRegState;
1566 if (!IsLastSubReg || (Lane != LaneE))
1567 State &= ~RegState
::Kill;
1568 if (!IsFirstSubReg || (Lane != LaneS))
1578 if (RemEltSize != EltSize) {
1579 assert(IsFlat && EltSize > 4);
1581 unsigned NumRegs = RemEltSize / 4;
1585 Desc = &
TII->get(Opc);
1588 unsigned FinalReg =
SubReg;
1593 if (!TmpIntermediateVGPR) {
1599 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1600 TmpIntermediateVGPR)
1602 if (NeedSuperRegDef)
1606 SubReg = TmpIntermediateVGPR;
1607 }
else if (UseVGPROffset) {
1609 if (!TmpOffsetVGPR) {
1624 if (UseVGPROffset) {
1633 if (SOffset == AMDGPU::NoRegister) {
1635 if (UseVGPROffset && ScratchOffsetReg) {
1636 MIB.
addReg(ScratchOffsetReg);
1643 MIB.addReg(SOffset, SOffsetRegState);
1645 MIB.addImm(
Offset + RegOffset)
1649 MIB.addMemOperand(NewMMO);
1651 if (!IsAGPR && NeedSuperRegDef)
1654 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1661 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1665 if (ScratchOffsetRegDelta != 0) {
1669 .
addImm(-ScratchOffsetRegDelta);
1675 bool IsKill)
const {
1693 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1698 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1712 bool SpillToVGPR = !VGPRSpills.
empty();
1713 if (OnlyToVGPR && !SpillToVGPR)
1722 "Num of VGPR lanes should be equal to num of SGPRs spilled");
1731 bool IsFirstSubreg =
i == 0;
1733 bool UseKill = SB.
IsKill && IsLastSubreg;
1739 SB.
TII.get(AMDGPU::V_WRITELANE_B32), Spill.
VGPR)
1756 if (SB.
NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
1776 for (
unsigned i =
Offset * PVD.PerVGPR,
1803 unsigned SuperKillState = 0;
1817 MI->eraseFromParent();
1832 bool SpillToVGPR = !VGPRSpills.
empty();
1833 if (OnlyToVGPR && !SpillToVGPR)
1868 for (
unsigned i =
Offset * PVD.PerVGPR,
1876 bool LastSubReg = (
i + 1 ==
e);
1878 SB.
TII.get(AMDGPU::V_READLANE_B32),
SubReg)
1895 MI->eraseFromParent();
1915 for (
unsigned i =
Offset * PVD.PerVGPR,
1934 unsigned SuperKillState = 0;
1944 MI = RestoreMBB.
end();
1950 for (
unsigned i =
Offset * PVD.PerVGPR,
1957 bool LastSubReg = (
i + 1 ==
e);
1978 switch (
MI->getOpcode()) {
1979 case AMDGPU::SI_SPILL_S1024_SAVE:
1980 case AMDGPU::SI_SPILL_S512_SAVE:
1981 case AMDGPU::SI_SPILL_S384_SAVE:
1982 case AMDGPU::SI_SPILL_S352_SAVE:
1983 case AMDGPU::SI_SPILL_S320_SAVE:
1984 case AMDGPU::SI_SPILL_S288_SAVE:
1985 case AMDGPU::SI_SPILL_S256_SAVE:
1986 case AMDGPU::SI_SPILL_S224_SAVE:
1987 case AMDGPU::SI_SPILL_S192_SAVE:
1988 case AMDGPU::SI_SPILL_S160_SAVE:
1989 case AMDGPU::SI_SPILL_S128_SAVE:
1990 case AMDGPU::SI_SPILL_S96_SAVE:
1991 case AMDGPU::SI_SPILL_S64_SAVE:
1992 case AMDGPU::SI_SPILL_S32_SAVE:
1994 case AMDGPU::SI_SPILL_S1024_RESTORE:
1995 case AMDGPU::SI_SPILL_S512_RESTORE:
1996 case AMDGPU::SI_SPILL_S384_RESTORE:
1997 case AMDGPU::SI_SPILL_S352_RESTORE:
1998 case AMDGPU::SI_SPILL_S320_RESTORE:
1999 case AMDGPU::SI_SPILL_S288_RESTORE:
2000 case AMDGPU::SI_SPILL_S256_RESTORE:
2001 case AMDGPU::SI_SPILL_S224_RESTORE:
2002 case AMDGPU::SI_SPILL_S192_RESTORE:
2003 case AMDGPU::SI_SPILL_S160_RESTORE:
2004 case AMDGPU::SI_SPILL_S128_RESTORE:
2005 case AMDGPU::SI_SPILL_S96_RESTORE:
2006 case AMDGPU::SI_SPILL_S64_RESTORE:
2007 case AMDGPU::SI_SPILL_S32_RESTORE:
2015 int SPAdj,
unsigned FIOperandNum,
2024 assert(SPAdj == 0 &&
"unhandled SP adjustment in call sequence?");
2027 int Index =
MI->getOperand(FIOperandNum).getIndex();
2033 switch (
MI->getOpcode()) {
2035 case AMDGPU::SI_SPILL_S1024_SAVE:
2036 case AMDGPU::SI_SPILL_S512_SAVE:
2037 case AMDGPU::SI_SPILL_S384_SAVE:
2038 case AMDGPU::SI_SPILL_S352_SAVE:
2039 case AMDGPU::SI_SPILL_S320_SAVE:
2040 case AMDGPU::SI_SPILL_S288_SAVE:
2041 case AMDGPU::SI_SPILL_S256_SAVE:
2042 case AMDGPU::SI_SPILL_S224_SAVE:
2043 case AMDGPU::SI_SPILL_S192_SAVE:
2044 case AMDGPU::SI_SPILL_S160_SAVE:
2045 case AMDGPU::SI_SPILL_S128_SAVE:
2046 case AMDGPU::SI_SPILL_S96_SAVE:
2047 case AMDGPU::SI_SPILL_S64_SAVE:
2048 case AMDGPU::SI_SPILL_S32_SAVE: {
2053 case AMDGPU::SI_SPILL_S1024_RESTORE:
2054 case AMDGPU::SI_SPILL_S512_RESTORE:
2055 case AMDGPU::SI_SPILL_S384_RESTORE:
2056 case AMDGPU::SI_SPILL_S352_RESTORE:
2057 case AMDGPU::SI_SPILL_S320_RESTORE:
2058 case AMDGPU::SI_SPILL_S288_RESTORE:
2059 case AMDGPU::SI_SPILL_S256_RESTORE:
2060 case AMDGPU::SI_SPILL_S224_RESTORE:
2061 case AMDGPU::SI_SPILL_S192_RESTORE:
2062 case AMDGPU::SI_SPILL_S160_RESTORE:
2063 case AMDGPU::SI_SPILL_S128_RESTORE:
2064 case AMDGPU::SI_SPILL_S96_RESTORE:
2065 case AMDGPU::SI_SPILL_S64_RESTORE:
2066 case AMDGPU::SI_SPILL_S32_RESTORE: {
2071 case AMDGPU::SI_SPILL_V1024_SAVE:
2072 case AMDGPU::SI_SPILL_V512_SAVE:
2073 case AMDGPU::SI_SPILL_V384_SAVE:
2074 case AMDGPU::SI_SPILL_V352_SAVE:
2075 case AMDGPU::SI_SPILL_V320_SAVE:
2076 case AMDGPU::SI_SPILL_V288_SAVE:
2077 case AMDGPU::SI_SPILL_V256_SAVE:
2078 case AMDGPU::SI_SPILL_V224_SAVE:
2079 case AMDGPU::SI_SPILL_V192_SAVE:
2080 case AMDGPU::SI_SPILL_V160_SAVE:
2081 case AMDGPU::SI_SPILL_V128_SAVE:
2082 case AMDGPU::SI_SPILL_V96_SAVE:
2083 case AMDGPU::SI_SPILL_V64_SAVE:
2084 case AMDGPU::SI_SPILL_V32_SAVE:
2085 case AMDGPU::SI_SPILL_A1024_SAVE:
2086 case AMDGPU::SI_SPILL_A512_SAVE:
2087 case AMDGPU::SI_SPILL_A384_SAVE:
2088 case AMDGPU::SI_SPILL_A352_SAVE:
2089 case AMDGPU::SI_SPILL_A320_SAVE:
2090 case AMDGPU::SI_SPILL_A288_SAVE:
2091 case AMDGPU::SI_SPILL_A256_SAVE:
2092 case AMDGPU::SI_SPILL_A224_SAVE:
2093 case AMDGPU::SI_SPILL_A192_SAVE:
2094 case AMDGPU::SI_SPILL_A160_SAVE:
2095 case AMDGPU::SI_SPILL_A128_SAVE:
2096 case AMDGPU::SI_SPILL_A96_SAVE:
2097 case AMDGPU::SI_SPILL_A64_SAVE:
2098 case AMDGPU::SI_SPILL_A32_SAVE:
2099 case AMDGPU::SI_SPILL_AV1024_SAVE:
2100 case AMDGPU::SI_SPILL_AV512_SAVE:
2101 case AMDGPU::SI_SPILL_AV384_SAVE:
2102 case AMDGPU::SI_SPILL_AV352_SAVE:
2103 case AMDGPU::SI_SPILL_AV320_SAVE:
2104 case AMDGPU::SI_SPILL_AV288_SAVE:
2105 case AMDGPU::SI_SPILL_AV256_SAVE:
2106 case AMDGPU::SI_SPILL_AV224_SAVE:
2107 case AMDGPU::SI_SPILL_AV192_SAVE:
2108 case AMDGPU::SI_SPILL_AV160_SAVE:
2109 case AMDGPU::SI_SPILL_AV128_SAVE:
2110 case AMDGPU::SI_SPILL_AV96_SAVE:
2111 case AMDGPU::SI_SPILL_AV64_SAVE:
2112 case AMDGPU::SI_SPILL_AV32_SAVE: {
2114 AMDGPU::OpName::vdata);
2115 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2119 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2120 auto *
MBB =
MI->getParent();
2123 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2124 *
MI->memoperands_begin(), RS);
2126 MI->eraseFromParent();
2129 case AMDGPU::SI_SPILL_V32_RESTORE:
2130 case AMDGPU::SI_SPILL_V64_RESTORE:
2131 case AMDGPU::SI_SPILL_V96_RESTORE:
2132 case AMDGPU::SI_SPILL_V128_RESTORE:
2133 case AMDGPU::SI_SPILL_V160_RESTORE:
2134 case AMDGPU::SI_SPILL_V192_RESTORE:
2135 case AMDGPU::SI_SPILL_V224_RESTORE:
2136 case AMDGPU::SI_SPILL_V256_RESTORE:
2137 case AMDGPU::SI_SPILL_V288_RESTORE:
2138 case AMDGPU::SI_SPILL_V320_RESTORE:
2139 case AMDGPU::SI_SPILL_V352_RESTORE:
2140 case AMDGPU::SI_SPILL_V384_RESTORE:
2141 case AMDGPU::SI_SPILL_V512_RESTORE:
2142 case AMDGPU::SI_SPILL_V1024_RESTORE:
2143 case AMDGPU::SI_SPILL_A32_RESTORE:
2144 case AMDGPU::SI_SPILL_A64_RESTORE:
2145 case AMDGPU::SI_SPILL_A96_RESTORE:
2146 case AMDGPU::SI_SPILL_A128_RESTORE:
2147 case AMDGPU::SI_SPILL_A160_RESTORE:
2148 case AMDGPU::SI_SPILL_A192_RESTORE:
2149 case AMDGPU::SI_SPILL_A224_RESTORE:
2150 case AMDGPU::SI_SPILL_A256_RESTORE:
2151 case AMDGPU::SI_SPILL_A288_RESTORE:
2152 case AMDGPU::SI_SPILL_A320_RESTORE:
2153 case AMDGPU::SI_SPILL_A352_RESTORE:
2154 case AMDGPU::SI_SPILL_A384_RESTORE:
2155 case AMDGPU::SI_SPILL_A512_RESTORE:
2156 case AMDGPU::SI_SPILL_A1024_RESTORE:
2157 case AMDGPU::SI_SPILL_AV32_RESTORE:
2158 case AMDGPU::SI_SPILL_AV64_RESTORE:
2159 case AMDGPU::SI_SPILL_AV96_RESTORE:
2160 case AMDGPU::SI_SPILL_AV128_RESTORE:
2161 case AMDGPU::SI_SPILL_AV160_RESTORE:
2162 case AMDGPU::SI_SPILL_AV192_RESTORE:
2163 case AMDGPU::SI_SPILL_AV224_RESTORE:
2164 case AMDGPU::SI_SPILL_AV256_RESTORE:
2165 case AMDGPU::SI_SPILL_AV288_RESTORE:
2166 case AMDGPU::SI_SPILL_AV320_RESTORE:
2167 case AMDGPU::SI_SPILL_AV352_RESTORE:
2168 case AMDGPU::SI_SPILL_AV384_RESTORE:
2169 case AMDGPU::SI_SPILL_AV512_RESTORE:
2170 case AMDGPU::SI_SPILL_AV1024_RESTORE: {
2172 AMDGPU::OpName::vdata);
2173 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2177 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2178 auto *
MBB =
MI->getParent();
2181 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2182 *
MI->memoperands_begin(), RS);
2183 MI->eraseFromParent();
2191 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2193 if (
TII->isFLATScratch(*
MI)) {
2194 assert((int16_t)FIOperandNum ==
2196 AMDGPU::OpName::saddr));
2206 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset);
2210 OffsetOp->
setImm(NewOffset);
2217 unsigned Opc =
MI->getOpcode();
2231 AMDGPU::OpName::vdst_in);
2232 bool TiedVDst = VDstIn != -1 &&
2233 MI->getOperand(VDstIn).isReg() &&
2234 MI->getOperand(VDstIn).isTied();
2236 MI->untieRegOperand(VDstIn);
2246 assert (NewVDst != -1 && NewVDstIn != -1 &&
"Must be tied!");
2247 MI->tieOperands(NewVDst, NewVDstIn);
2249 MI->setDesc(
TII->get(NewOpc));
2257 if (
TII->isImmOperandLegal(*
MI, FIOperandNum, FIOp))
2264 bool UseSGPR =
TII->isOperandLegal(*
MI, FIOperandNum, &FIOp);
2266 if (!
Offset && FrameReg && UseSGPR) {
2272 : &AMDGPU::VGPR_32RegClass;
2278 if ((!FrameReg || !
Offset) && TmpReg) {
2279 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2282 MIB.addReg(FrameReg);
2299 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2310 assert(!(
Offset & 0x1) &&
"Flat scratch offset must be aligned!");
2330 if (TmpSReg == FrameReg) {
2356 bool IsMUBUF =
TII->isMUBUF(*
MI);
2365 ? &AMDGPU::SReg_32RegClass
2366 : &AMDGPU::VGPR_32RegClass;
2367 bool IsCopy =
MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2368 MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
2369 Register ResultReg = IsCopy ?
MI->getOperand(0).getReg()
2372 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2374 unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
2375 : AMDGPU::V_LSHRREV_B32_e64;
2380 if (IsSALU && !LiveSCC)
2381 Shift.getInstr()->getOperand(3).setIsDead();
2382 if (IsSALU && LiveSCC) {
2388 ResultReg = NewDest;
2393 if ((MIB =
TII->getAddNoCarry(*
MBB,
MI,
DL, ResultReg, *RS)) !=
2403 const bool IsVOP2 = MIB->
getOpcode() == AMDGPU::V_ADD_U32_e32;
2414 "Need to reuse carry out register");
2419 ConstOffsetReg = getSubReg(MIB.
getReg(1), AMDGPU::sub0);
2421 ConstOffsetReg = MIB.
getReg(1);
2431 if (!MIB || IsSALU) {
2440 Register ScaledReg = TmpScaledReg.
isValid() ? TmpScaledReg : FrameReg;
2452 ResultReg = ScaledReg;
2455 if (!TmpScaledReg.
isValid()) {
2468 MI->eraseFromParent();
2477 assert(
static_cast<int>(FIOperandNum) ==
2479 AMDGPU::OpName::vaddr));
2481 auto &SOffset = *
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset);
2482 assert((SOffset.isImm() && SOffset.getImm() == 0));
2484 if (FrameReg != AMDGPU::NoRegister)
2485 SOffset.ChangeToRegister(FrameReg,
false);
2487 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2489 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm();
2490 int64_t NewOffset = OldImm +
Offset;
2494 MI->eraseFromParent();
2503 if (!
TII->isImmOperandLegal(*
MI, FIOperandNum, FIOp)) {
2521 return &AMDGPU::VReg_64RegClass;
2523 return &AMDGPU::VReg_96RegClass;
2525 return &AMDGPU::VReg_128RegClass;
2527 return &AMDGPU::VReg_160RegClass;
2529 return &AMDGPU::VReg_192RegClass;
2531 return &AMDGPU::VReg_224RegClass;
2533 return &AMDGPU::VReg_256RegClass;
2535 return &AMDGPU::VReg_288RegClass;
2537 return &AMDGPU::VReg_320RegClass;
2539 return &AMDGPU::VReg_352RegClass;
2541 return &AMDGPU::VReg_384RegClass;
2543 return &AMDGPU::VReg_512RegClass;
2545 return &AMDGPU::VReg_1024RegClass;
2553 return &AMDGPU::VReg_64_Align2RegClass;
2555 return &AMDGPU::VReg_96_Align2RegClass;
2557 return &AMDGPU::VReg_128_Align2RegClass;
2559 return &AMDGPU::VReg_160_Align2RegClass;
2561 return &AMDGPU::VReg_192_Align2RegClass;
2563 return &AMDGPU::VReg_224_Align2RegClass;
2565 return &AMDGPU::VReg_256_Align2RegClass;
2567 return &AMDGPU::VReg_288_Align2RegClass;
2569 return &AMDGPU::VReg_320_Align2RegClass;
2571 return &AMDGPU::VReg_352_Align2RegClass;
2573 return &AMDGPU::VReg_384_Align2RegClass;
2575 return &AMDGPU::VReg_512_Align2RegClass;
2577 return &AMDGPU::VReg_1024_Align2RegClass;
2585 return &AMDGPU::VReg_1RegClass;
2587 return &AMDGPU::VGPR_LO16RegClass;
2589 return &AMDGPU::VGPR_32RegClass;
2597 return &AMDGPU::AReg_64RegClass;
2599 return &AMDGPU::AReg_96RegClass;
2601 return &AMDGPU::AReg_128RegClass;
2603 return &AMDGPU::AReg_160RegClass;
2605 return &AMDGPU::AReg_192RegClass;
2607 return &AMDGPU::AReg_224RegClass;
2609 return &AMDGPU::AReg_256RegClass;
2611 return &AMDGPU::AReg_288RegClass;
2613 return &AMDGPU::AReg_320RegClass;
2615 return &AMDGPU::AReg_352RegClass;
2617 return &AMDGPU::AReg_384RegClass;
2619 return &AMDGPU::AReg_512RegClass;
2621 return &AMDGPU::AReg_1024RegClass;
2629 return &AMDGPU::AReg_64_Align2RegClass;
2631 return &AMDGPU::AReg_96_Align2RegClass;
2633 return &AMDGPU::AReg_128_Align2RegClass;
2635 return &AMDGPU::AReg_160_Align2RegClass;
2637 return &AMDGPU::AReg_192_Align2RegClass;
2639 return &AMDGPU::AReg_224_Align2RegClass;
2641 return &AMDGPU::AReg_256_Align2RegClass;
2643 return &AMDGPU::AReg_288_Align2RegClass;
2645 return &AMDGPU::AReg_320_Align2RegClass;
2647 return &AMDGPU::AReg_352_Align2RegClass;
2649 return &AMDGPU::AReg_384_Align2RegClass;
2651 return &AMDGPU::AReg_512_Align2RegClass;
2653 return &AMDGPU::AReg_1024_Align2RegClass;
2661 return &AMDGPU::AGPR_LO16RegClass;
2663 return &AMDGPU::AGPR_32RegClass;
2671 return &AMDGPU::AV_64RegClass;
2673 return &AMDGPU::AV_96RegClass;
2675 return &AMDGPU::AV_128RegClass;
2677 return &AMDGPU::AV_160RegClass;
2679 return &AMDGPU::AV_192RegClass;
2681 return &AMDGPU::AV_224RegClass;
2683 return &AMDGPU::AV_256RegClass;
2685 return &AMDGPU::AV_288RegClass;
2687 return &AMDGPU::AV_320RegClass;
2689 return &AMDGPU::AV_352RegClass;
2691 return &AMDGPU::AV_384RegClass;
2693 return &AMDGPU::AV_512RegClass;
2695 return &AMDGPU::AV_1024RegClass;
2703 return &AMDGPU::AV_64_Align2RegClass;
2705 return &AMDGPU::AV_96_Align2RegClass;
2707 return &AMDGPU::AV_128_Align2RegClass;
2709 return &AMDGPU::AV_160_Align2RegClass;
2711 return &AMDGPU::AV_192_Align2RegClass;
2713 return &AMDGPU::AV_224_Align2RegClass;
2715 return &AMDGPU::AV_256_Align2RegClass;
2717 return &AMDGPU::AV_288_Align2RegClass;
2719 return &AMDGPU::AV_320_Align2RegClass;
2721 return &AMDGPU::AV_352_Align2RegClass;
2723 return &AMDGPU::AV_384_Align2RegClass;
2725 return &AMDGPU::AV_512_Align2RegClass;
2727 return &AMDGPU::AV_1024_Align2RegClass;
2735 return &AMDGPU::VGPR_LO16RegClass;
2737 return &AMDGPU::AV_32RegClass;
2746 return &AMDGPU::SGPR_LO16RegClass;
2748 return &AMDGPU::SReg_32RegClass;
2750 return &AMDGPU::SReg_64RegClass;
2752 return &AMDGPU::SGPR_96RegClass;
2754 return &AMDGPU::SGPR_128RegClass;
2756 return &AMDGPU::SGPR_160RegClass;
2758 return &AMDGPU::SGPR_192RegClass;
2760 return &AMDGPU::SGPR_224RegClass;
2762 return &AMDGPU::SGPR_256RegClass;
2764 return &AMDGPU::SGPR_288RegClass;
2766 return &AMDGPU::SGPR_320RegClass;
2768 return &AMDGPU::SGPR_352RegClass;
2770 return &AMDGPU::SGPR_384RegClass;
2772 return &AMDGPU::SGPR_512RegClass;
2774 return &AMDGPU::SGPR_1024RegClass;
2782 if (
Reg.isVirtual())
2785 RC = getPhysRegBaseClass(
Reg);
2791 unsigned Size = getRegSizeInBits(*SRC);
2793 assert(VRC &&
"Invalid register class size");
2799 unsigned Size = getRegSizeInBits(*SRC);
2801 assert(ARC &&
"Invalid register class size");
2807 unsigned Size = getRegSizeInBits(*VRC);
2809 return &AMDGPU::SGPR_32RegClass;
2811 assert(SRC &&
"Invalid register class size");
2818 unsigned SubIdx)
const {
2821 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2822 return MatchRC && MatchRC->
hasSubClassEq(SuperRC) ? MatchRC :
nullptr;
2838 unsigned SrcSubReg)
const {
2855 return getCommonSubClass(DefRC, SrcRC) !=
nullptr;
2871 bool ReserveHighestVGPR)
const {
2872 if (ReserveHighestVGPR) {
2885 unsigned EltSize)
const {
2887 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
2889 const unsigned RegDWORDs = RegBitWidth / 32;
2890 const unsigned EltDWORDs = EltSize / 4;
2891 assert(RegSplitParts.size() + 1 >= EltDWORDs);
2893 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2894 const unsigned NumParts = RegDWORDs / EltDWORDs;
2896 return ArrayRef(Parts.data(), NumParts);
2909 return getSubRegisterClass(SrcRC, MO.
getSubReg());
2934 unsigned SrcSize = getRegSizeInBits(*SrcRC);
2935 unsigned DstSize = getRegSizeInBits(*DstRC);
2936 unsigned NewSize = getRegSizeInBits(*NewRC);
2942 if (SrcSize <= 32 || DstSize <= 32)
2945 return NewSize <= DstSize || NewSize <= SrcSize;
2954 switch (RC->
getID()) {
2956 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
2957 case AMDGPU::VGPR_32RegClassID:
2958 case AMDGPU::VGPR_LO16RegClassID:
2959 case AMDGPU::VGPR_HI16RegClassID:
2961 case AMDGPU::SGPR_32RegClassID:
2962 case AMDGPU::SGPR_LO16RegClassID:
2968 unsigned Idx)
const {
2969 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
2970 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
2974 if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
2982 static const int Empty[] = { -1 };
2984 if (RegPressureIgnoredUnits[RegUnit])
2987 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
2992 return AMDGPU::SGPR30_SGPR31;
2998 switch (RB.
getID()) {
2999 case AMDGPU::VGPRRegBankID:
3001 case AMDGPU::VCCRegBankID:
3003 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3004 : &AMDGPU::SReg_64_XEXECRegClass;
3005 case AMDGPU::SGPRRegBankID:
3007 case AMDGPU::AGPRRegBankID:
3022 return getAllocatableClass(RC);
3028 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3032 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3038 : &AMDGPU::VReg_64RegClass;
3043 switch ((
int)RCID) {
3044 case AMDGPU::SReg_1RegClassID:
3046 case AMDGPU::SReg_1_XEXECRegClassID:
3047 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3048 : &AMDGPU::SReg_64_XEXECRegClass;
3065 if (
Reg.isVirtual()) {
3074 if ((
S.LaneMask & SubLanes) == SubLanes) {
3075 V =
S.getVNInfoAt(UseIdx);
3112 assert(getRegSizeInBits(*getPhysRegBaseClass(
Reg)) <= 32);
3115 AMDGPU::SReg_32RegClass,
3116 AMDGPU::AGPR_32RegClass } ) {
3120 if (
MCPhysReg Super = getMatchingSuperReg(
Reg, AMDGPU::hi16,
3121 &AMDGPU::VGPR_32RegClass)) {
3125 return AMDGPU::NoRegister;
3148 unsigned Size = getRegSizeInBits(*RC);