29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc(
"Enable spilling VGPRs to SGPRs"),
38std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
116 MI->getOperand(0).isKill(),
Index,
RS) {}
131 MovOpc = AMDGPU::S_MOV_B32;
132 NotOpc = AMDGPU::S_NOT_B32;
135 MovOpc = AMDGPU::S_MOV_B64;
136 NotOpc = AMDGPU::S_NOT_B64;
141 SuperReg != AMDGPU::EXEC &&
"exec should never spill");
172 assert(
RS &&
"Cannot spill SGPR to memory without RegScavenger");
200 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
221 MI->emitError(
"unhandled SGPR spill to memory");
231 I->getOperand(2).setIsDead();
266 I->getOperand(2).setIsDead();
296 MI->emitError(
"unhandled SGPR spill to memory");
323 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
324 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
325 (getSubRegIndexLaneMask(AMDGPU::lo16) |
326 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
327 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
328 "getNumCoveredRegs() will not work with generated subreg masks!");
330 RegPressureIgnoredUnits.
resize(getNumRegUnits());
331 RegPressureIgnoredUnits.
set(
333 for (
auto Reg : AMDGPU::VGPR_HI16RegClass)
339 static auto InitializeRegSplitPartsOnce = [
this]() {
340 for (
unsigned Idx = 1,
E = getNumSubRegIndices() - 1;
Idx <
E; ++
Idx) {
341 unsigned Size = getSubRegIdxSize(
Idx);
344 std::vector<int16_t> &Vec = RegSplitParts[
Size / 32 - 1];
345 unsigned Pos = getSubRegIdxOffset(
Idx);
350 unsigned MaxNumParts = 1024 /
Size;
351 Vec.resize(MaxNumParts);
359 static auto InitializeSubRegFromChannelTableOnce = [
this]() {
360 for (
auto &Row : SubRegFromChannelTable)
361 Row.fill(AMDGPU::NoSubRegister);
362 for (
unsigned Idx = 1;
Idx < getNumSubRegIndices(); ++
Idx) {
363 unsigned Width = AMDGPUSubRegIdxRanges[
Idx].Size / 32;
364 unsigned Offset = AMDGPUSubRegIdxRanges[
Idx].Offset / 32;
369 unsigned TableIdx = Width - 1;
370 assert(TableIdx < SubRegFromChannelTable.size());
372 SubRegFromChannelTable[TableIdx][
Offset] =
Idx;
376 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
378 InitializeSubRegFromChannelTableOnce);
396 : CSR_AMDGPU_SaveList;
398 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
399 : CSR_AMDGPU_SI_Gfx_SaveList;
402 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
403 return &NoCalleeSavedReg;
420 : CSR_AMDGPU_RegMask;
422 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
423 : CSR_AMDGPU_SI_Gfx_RegMask;
430 return CSR_AMDGPU_NoRegs_RegMask;
441 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
442 return &AMDGPU::AV_32RegClass;
443 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
444 return &AMDGPU::AV_64RegClass;
445 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
446 RC == &AMDGPU::AReg_64_Align2RegClass)
447 return &AMDGPU::AV_64_Align2RegClass;
448 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
449 return &AMDGPU::AV_96RegClass;
450 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
451 RC == &AMDGPU::AReg_96_Align2RegClass)
452 return &AMDGPU::AV_96_Align2RegClass;
453 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
454 return &AMDGPU::AV_128RegClass;
455 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
456 RC == &AMDGPU::AReg_128_Align2RegClass)
457 return &AMDGPU::AV_128_Align2RegClass;
458 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
459 return &AMDGPU::AV_160RegClass;
460 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
461 RC == &AMDGPU::AReg_160_Align2RegClass)
462 return &AMDGPU::AV_160_Align2RegClass;
463 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
464 return &AMDGPU::AV_192RegClass;
465 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
466 RC == &AMDGPU::AReg_192_Align2RegClass)
467 return &AMDGPU::AV_192_Align2RegClass;
468 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
469 return &AMDGPU::AV_256RegClass;
470 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
471 RC == &AMDGPU::AReg_256_Align2RegClass)
472 return &AMDGPU::AV_256_Align2RegClass;
473 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
474 return &AMDGPU::AV_512RegClass;
475 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
476 RC == &AMDGPU::AReg_512_Align2RegClass)
477 return &AMDGPU::AV_512_Align2RegClass;
478 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
479 return &AMDGPU::AV_1024RegClass;
480 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
481 RC == &AMDGPU::AReg_1024_Align2RegClass)
482 return &AMDGPU::AV_1024_Align2RegClass;
512 return AMDGPU_AllVGPRs_RegMask;
516 return AMDGPU_AllAGPRs_RegMask;
520 return AMDGPU_AllVectorRegs_RegMask;
524 return AMDGPU_AllAllocatableSRegs_RegMask;
531 assert(NumRegIndex &&
"Not implemented");
532 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].
size());
533 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
539 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
540 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
553 reserveRegisterTuples(
Reserved, AMDGPU::EXEC);
554 reserveRegisterTuples(
Reserved, AMDGPU::FLAT_SCR);
557 reserveRegisterTuples(
Reserved, AMDGPU::M0);
560 reserveRegisterTuples(
Reserved, AMDGPU::SRC_VCCZ);
561 reserveRegisterTuples(
Reserved, AMDGPU::SRC_EXECZ);
562 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SCC);
565 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_BASE);
566 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_LIMIT);
567 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_BASE);
568 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
571 reserveRegisterTuples(
Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
574 reserveRegisterTuples(
Reserved, AMDGPU::XNACK_MASK);
577 reserveRegisterTuples(
Reserved, AMDGPU::LDS_DIRECT);
580 reserveRegisterTuples(
Reserved, AMDGPU::TBA);
581 reserveRegisterTuples(
Reserved, AMDGPU::TMA);
582 reserveRegisterTuples(
Reserved, AMDGPU::TTMP0_TTMP1);
583 reserveRegisterTuples(
Reserved, AMDGPU::TTMP2_TTMP3);
584 reserveRegisterTuples(
Reserved, AMDGPU::TTMP4_TTMP5);
585 reserveRegisterTuples(
Reserved, AMDGPU::TTMP6_TTMP7);
586 reserveRegisterTuples(
Reserved, AMDGPU::TTMP8_TTMP9);
587 reserveRegisterTuples(
Reserved, AMDGPU::TTMP10_TTMP11);
588 reserveRegisterTuples(
Reserved, AMDGPU::TTMP12_TTMP13);
589 reserveRegisterTuples(
Reserved, AMDGPU::TTMP14_TTMP15);
592 reserveRegisterTuples(
Reserved, AMDGPU::SGPR_NULL64);
604 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
605 for (
unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
606 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
607 reserveRegisterTuples(
Reserved, Reg);
611 if (ScratchRSrcReg != AMDGPU::NoRegister) {
615 reserveRegisterTuples(
Reserved, ScratchRSrcReg);
623 reserveRegisterTuples(
Reserved, StackPtrReg);
624 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
629 reserveRegisterTuples(
Reserved, FrameReg);
630 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
635 reserveRegisterTuples(
Reserved, BasePtrReg);
636 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
642 unsigned MaxNumAGPRs = MaxNumVGPRs;
643 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
656 MaxNumAGPRs = MaxNumVGPRs;
658 if (MaxNumVGPRs > TotalNumVGPRs) {
659 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
660 MaxNumVGPRs = TotalNumVGPRs;
666 for (
unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
667 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
668 reserveRegisterTuples(
Reserved, Reg);
672 for (
unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
673 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
674 reserveRegisterTuples(
Reserved, Reg);
678 for (
MCRegister Reg : AMDGPU::AGPR_32RegClass)
679 reserveRegisterTuples(
Reserved, Reg);
689 reserveRegisterTuples(
Reserved, Reg);
693 reserveRegisterTuples(
Reserved, Reg);
696 reserveRegisterTuples(
Reserved, Reg);
699 reserveRegisterTuples(
Reserved, Reg);
716 if (
Info->isEntryFunction())
724 if (
Info->isEntryFunction()) {
758 AMDGPU::OpName::offset);
759 return MI->getOperand(OffIdx).getImm();
768 AMDGPU::OpName::vaddr) ||
770 AMDGPU::OpName::saddr))) &&
771 "Should never see frame index on non-address operand");
797 DL = Ins->getDebugLoc();
803 : AMDGPU::V_MOV_B32_e32;
807 : &AMDGPU::VGPR_32RegClass);
815 Register OffsetReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
819 : &AMDGPU::VGPR_32RegClass);
833 TII->getAddNoCarry(*
MBB, Ins,
DL, BaseReg)
844 bool IsFlat =
TII->isFLATScratch(
MI);
860 TII->getNamedOperand(
MI, IsFlat ? AMDGPU::OpName::saddr
861 : AMDGPU::OpName::vaddr);
866 assert(FIOp && FIOp->
isFI() &&
"frame index must be address operand");
872 "offset should be legal");
874 OffsetOp->
setImm(NewOffset);
884 "offset should be legal");
887 OffsetOp->
setImm(NewOffset);
911 return &AMDGPU::VGPR_32RegClass;
918 if (RC == &AMDGPU::SCC_CLASSRegClass)
927 case AMDGPU::SI_SPILL_S1024_SAVE:
928 case AMDGPU::SI_SPILL_S1024_RESTORE:
929 case AMDGPU::SI_SPILL_V1024_SAVE:
930 case AMDGPU::SI_SPILL_V1024_RESTORE:
931 case AMDGPU::SI_SPILL_A1024_SAVE:
932 case AMDGPU::SI_SPILL_A1024_RESTORE:
933 case AMDGPU::SI_SPILL_AV1024_SAVE:
934 case AMDGPU::SI_SPILL_AV1024_RESTORE:
936 case AMDGPU::SI_SPILL_S512_SAVE:
937 case AMDGPU::SI_SPILL_S512_RESTORE:
938 case AMDGPU::SI_SPILL_V512_SAVE:
939 case AMDGPU::SI_SPILL_V512_RESTORE:
940 case AMDGPU::SI_SPILL_A512_SAVE:
941 case AMDGPU::SI_SPILL_A512_RESTORE:
942 case AMDGPU::SI_SPILL_AV512_SAVE:
943 case AMDGPU::SI_SPILL_AV512_RESTORE:
945 case AMDGPU::SI_SPILL_S384_SAVE:
946 case AMDGPU::SI_SPILL_S384_RESTORE:
947 case AMDGPU::SI_SPILL_V384_SAVE:
948 case AMDGPU::SI_SPILL_V384_RESTORE:
949 case AMDGPU::SI_SPILL_A384_SAVE:
950 case AMDGPU::SI_SPILL_A384_RESTORE:
951 case AMDGPU::SI_SPILL_AV384_SAVE:
952 case AMDGPU::SI_SPILL_AV384_RESTORE:
954 case AMDGPU::SI_SPILL_S352_SAVE:
955 case AMDGPU::SI_SPILL_S352_RESTORE:
956 case AMDGPU::SI_SPILL_V352_SAVE:
957 case AMDGPU::SI_SPILL_V352_RESTORE:
958 case AMDGPU::SI_SPILL_A352_SAVE:
959 case AMDGPU::SI_SPILL_A352_RESTORE:
960 case AMDGPU::SI_SPILL_AV352_SAVE:
961 case AMDGPU::SI_SPILL_AV352_RESTORE:
963 case AMDGPU::SI_SPILL_S320_SAVE:
964 case AMDGPU::SI_SPILL_S320_RESTORE:
965 case AMDGPU::SI_SPILL_V320_SAVE:
966 case AMDGPU::SI_SPILL_V320_RESTORE:
967 case AMDGPU::SI_SPILL_A320_SAVE:
968 case AMDGPU::SI_SPILL_A320_RESTORE:
969 case AMDGPU::SI_SPILL_AV320_SAVE:
970 case AMDGPU::SI_SPILL_AV320_RESTORE:
972 case AMDGPU::SI_SPILL_S288_SAVE:
973 case AMDGPU::SI_SPILL_S288_RESTORE:
974 case AMDGPU::SI_SPILL_V288_SAVE:
975 case AMDGPU::SI_SPILL_V288_RESTORE:
976 case AMDGPU::SI_SPILL_A288_SAVE:
977 case AMDGPU::SI_SPILL_A288_RESTORE:
978 case AMDGPU::SI_SPILL_AV288_SAVE:
979 case AMDGPU::SI_SPILL_AV288_RESTORE:
981 case AMDGPU::SI_SPILL_S256_SAVE:
982 case AMDGPU::SI_SPILL_S256_RESTORE:
983 case AMDGPU::SI_SPILL_V256_SAVE:
984 case AMDGPU::SI_SPILL_V256_RESTORE:
985 case AMDGPU::SI_SPILL_A256_SAVE:
986 case AMDGPU::SI_SPILL_A256_RESTORE:
987 case AMDGPU::SI_SPILL_AV256_SAVE:
988 case AMDGPU::SI_SPILL_AV256_RESTORE:
990 case AMDGPU::SI_SPILL_S224_SAVE:
991 case AMDGPU::SI_SPILL_S224_RESTORE:
992 case AMDGPU::SI_SPILL_V224_SAVE:
993 case AMDGPU::SI_SPILL_V224_RESTORE:
994 case AMDGPU::SI_SPILL_A224_SAVE:
995 case AMDGPU::SI_SPILL_A224_RESTORE:
996 case AMDGPU::SI_SPILL_AV224_SAVE:
997 case AMDGPU::SI_SPILL_AV224_RESTORE:
999 case AMDGPU::SI_SPILL_S192_SAVE:
1000 case AMDGPU::SI_SPILL_S192_RESTORE:
1001 case AMDGPU::SI_SPILL_V192_SAVE:
1002 case AMDGPU::SI_SPILL_V192_RESTORE:
1003 case AMDGPU::SI_SPILL_A192_SAVE:
1004 case AMDGPU::SI_SPILL_A192_RESTORE:
1005 case AMDGPU::SI_SPILL_AV192_SAVE:
1006 case AMDGPU::SI_SPILL_AV192_RESTORE:
1008 case AMDGPU::SI_SPILL_S160_SAVE:
1009 case AMDGPU::SI_SPILL_S160_RESTORE:
1010 case AMDGPU::SI_SPILL_V160_SAVE:
1011 case AMDGPU::SI_SPILL_V160_RESTORE:
1012 case AMDGPU::SI_SPILL_A160_SAVE:
1013 case AMDGPU::SI_SPILL_A160_RESTORE:
1014 case AMDGPU::SI_SPILL_AV160_SAVE:
1015 case AMDGPU::SI_SPILL_AV160_RESTORE:
1017 case AMDGPU::SI_SPILL_S128_SAVE:
1018 case AMDGPU::SI_SPILL_S128_RESTORE:
1019 case AMDGPU::SI_SPILL_V128_SAVE:
1020 case AMDGPU::SI_SPILL_V128_RESTORE:
1021 case AMDGPU::SI_SPILL_A128_SAVE:
1022 case AMDGPU::SI_SPILL_A128_RESTORE:
1023 case AMDGPU::SI_SPILL_AV128_SAVE:
1024 case AMDGPU::SI_SPILL_AV128_RESTORE:
1026 case AMDGPU::SI_SPILL_S96_SAVE:
1027 case AMDGPU::SI_SPILL_S96_RESTORE:
1028 case AMDGPU::SI_SPILL_V96_SAVE:
1029 case AMDGPU::SI_SPILL_V96_RESTORE:
1030 case AMDGPU::SI_SPILL_A96_SAVE:
1031 case AMDGPU::SI_SPILL_A96_RESTORE:
1032 case AMDGPU::SI_SPILL_AV96_SAVE:
1033 case AMDGPU::SI_SPILL_AV96_RESTORE:
1035 case AMDGPU::SI_SPILL_S64_SAVE:
1036 case AMDGPU::SI_SPILL_S64_RESTORE:
1037 case AMDGPU::SI_SPILL_V64_SAVE:
1038 case AMDGPU::SI_SPILL_V64_RESTORE:
1039 case AMDGPU::SI_SPILL_A64_SAVE:
1040 case AMDGPU::SI_SPILL_A64_RESTORE:
1041 case AMDGPU::SI_SPILL_AV64_SAVE:
1042 case AMDGPU::SI_SPILL_AV64_RESTORE:
1044 case AMDGPU::SI_SPILL_S32_SAVE:
1045 case AMDGPU::SI_SPILL_S32_RESTORE:
1046 case AMDGPU::SI_SPILL_V32_SAVE:
1047 case AMDGPU::SI_SPILL_V32_RESTORE:
1048 case AMDGPU::SI_SPILL_A32_SAVE:
1049 case AMDGPU::SI_SPILL_A32_RESTORE:
1050 case AMDGPU::SI_SPILL_AV32_SAVE:
1051 case AMDGPU::SI_SPILL_AV32_RESTORE:
1059 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1060 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1061 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1062 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1063 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1064 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1065 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1066 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1067 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1068 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1069 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1070 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1071 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1072 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1073 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1074 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1082 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1083 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1084 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1085 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1086 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1087 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1088 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1089 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1090 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1091 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1092 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1093 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1094 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1095 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1096 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1097 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1098 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1099 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1100 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1101 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1102 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1103 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1104 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1105 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1106 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1107 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1108 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1109 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1117 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1118 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1119 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1120 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1121 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1122 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1123 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1124 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1125 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1126 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1127 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1128 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1129 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1130 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1131 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1132 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1140 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1141 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1142 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1143 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1144 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1145 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1146 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1147 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1148 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1149 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1150 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1151 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1152 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1153 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1154 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1155 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1156 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1157 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1158 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1159 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1160 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1161 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1162 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1163 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1164 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1165 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1166 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1167 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1176 int Index,
unsigned Lane,
1177 unsigned ValueReg,
bool IsKill) {
1184 if (Reg == AMDGPU::NoRegister)
1187 bool IsStore =
MI->mayStore();
1191 unsigned Dst = IsStore ? Reg : ValueReg;
1192 unsigned Src = IsStore ? ValueReg : Reg;
1193 bool IsVGPR =
TRI->isVGPR(
MRI, Reg);
1195 if (IsVGPR ==
TRI->isVGPR(
MRI, ValueReg)) {
1205 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1206 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1224 bool IsStore =
MI->mayStore();
1226 unsigned Opc =
MI->getOpcode();
1227 int LoadStoreOp = IsStore ?
1229 if (LoadStoreOp == -1)
1239 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::srsrc))
1240 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset))
1247 AMDGPU::OpName::vdata_in);
1249 NewMI.
add(*VDataIn);
1254 unsigned LoadStoreOp,
1256 bool IsStore =
TII->get(LoadStoreOp).mayStore();
1263 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1264 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1267 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1268 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1271 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1272 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1275 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1276 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1292 unsigned LoadStoreOp,
int Index,
Register ValueReg,
bool IsKill,
1295 assert((!RS || !LiveRegs) &&
"Only RS or LiveRegs can be set but not both");
1303 bool IsStore = Desc->mayStore();
1304 bool IsFlat =
TII->isFLATScratch(LoadStoreOp);
1306 bool CanClobberSCC =
false;
1307 bool Scavenged =
false;
1317 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1318 unsigned NumSubRegs = RegWidth / EltSize;
1319 unsigned Size = NumSubRegs * EltSize;
1320 unsigned RemSize = RegWidth -
Size;
1321 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1323 int64_t MaterializedOffset =
Offset;
1325 int64_t MaxOffset =
Offset +
Size + RemSize - EltSize;
1326 int64_t ScratchOffsetRegDelta = 0;
1328 if (IsFlat && EltSize > 4) {
1330 Desc = &
TII->get(LoadStoreOp);
1337 "unexpected VGPR spill offset");
1344 bool UseVGPROffset =
false;
1351 if (IsFlat && SGPRBase) {
1375 bool IsOffsetLegal =
1390 CanClobberSCC = !RS->
isRegUsed(AMDGPU::SCC);
1391 }
else if (LiveRegs) {
1392 CanClobberSCC = !LiveRegs->
contains(AMDGPU::SCC);
1393 for (
MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1401 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1405 UseVGPROffset =
true;
1411 for (
MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1413 TmpOffsetVGPR = Reg;
1420 }
else if (!SOffset && CanClobberSCC) {
1431 if (!ScratchOffsetReg)
1432 ScratchOffsetReg =
FuncInfo->getStackPtrOffsetReg();
1433 SOffset = ScratchOffsetReg;
1434 ScratchOffsetRegDelta =
Offset;
1442 if (!IsFlat && !UseVGPROffset)
1445 if (!UseVGPROffset && !SOffset)
1448 if (UseVGPROffset) {
1450 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR,
Offset);
1451 }
else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1456 .
addReg(ScratchOffsetReg)
1458 Add->getOperand(3).setIsDead();
1464 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1466 &&
"Unexpected vaddr for flat scratch with a FI operand");
1468 if (UseVGPROffset) {
1475 Desc = &
TII->get(LoadStoreOp);
1478 for (
unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1479 ++i, RegOffset += EltSize) {
1480 if (i == NumSubRegs) {
1484 Desc = &
TII->get(LoadStoreOp);
1486 if (!IsFlat && UseVGPROffset) {
1489 Desc = &
TII->get(NewLoadStoreOp);
1492 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1499 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1502 unsigned NumRegs = EltSize / 4;
1508 unsigned SOffsetRegState = 0;
1510 const bool IsLastSubReg = i + 1 == e;
1511 const bool IsFirstSubReg = i == 0;
1520 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1521 bool NeedSuperRegImpOperand = e > 1;
1525 unsigned RemEltSize = EltSize;
1533 for (
int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1534 LaneE = RegOffset / 4;
1535 Lane >= LaneE; --Lane) {
1536 bool IsSubReg = e > 1 || EltSize > 4;
1541 if (!MIB.getInstr())
1543 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1545 NeedSuperRegDef =
false;
1547 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1548 NeedSuperRegImpOperand =
true;
1549 unsigned State = SrcDstRegState;
1550 if (!IsLastSubReg || (Lane != LaneE))
1551 State &= ~RegState::Kill;
1552 if (!IsFirstSubReg || (Lane != LaneS))
1553 State &= ~RegState::Define;
1562 if (RemEltSize != EltSize) {
1563 assert(IsFlat && EltSize > 4);
1565 unsigned NumRegs = RemEltSize / 4;
1569 Desc = &
TII->get(Opc);
1572 unsigned FinalReg =
SubReg;
1577 if (!TmpIntermediateVGPR) {
1578 TmpIntermediateVGPR =
FuncInfo->getVGPRForAGPRCopy();
1583 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1584 TmpIntermediateVGPR)
1586 if (NeedSuperRegDef)
1590 SubReg = TmpIntermediateVGPR;
1591 }
else if (UseVGPROffset) {
1593 if (!TmpOffsetVGPR) {
1608 if (UseVGPROffset) {
1617 if (SOffset == AMDGPU::NoRegister) {
1619 if (UseVGPROffset && ScratchOffsetReg) {
1620 MIB.
addReg(ScratchOffsetReg);
1627 MIB.addReg(SOffset, SOffsetRegState);
1629 MIB.addImm(
Offset + RegOffset)
1633 MIB.addMemOperand(NewMMO);
1635 if (!IsAGPR && NeedSuperRegDef)
1638 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1645 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1669 if (!IsStore &&
MI !=
MBB.
end() &&
MI->isReturn() &&
1672 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1676 if (ScratchOffsetRegDelta != 0) {
1680 .
addImm(-ScratchOffsetRegDelta);
1686 bool IsKill)
const {
1704 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1709 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1723 bool SpillToVGPR = !VGPRSpills.
empty();
1724 if (OnlyToVGPR && !SpillToVGPR)
1733 "Num of VGPR lanes should be equal to num of SGPRs spilled");
1735 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
1742 bool IsFirstSubreg = i == 0;
1744 bool UseKill = SB.
IsKill && IsLastSubreg;
1750 SB.
TII.get(AMDGPU::V_WRITELANE_B32), Spill.
VGPR)
1767 if (SB.
NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
1787 for (
unsigned i =
Offset * PVD.PerVGPR,
1814 unsigned SuperKillState = 0;
1828 MI->eraseFromParent();
1843 bool SpillToVGPR = !VGPRSpills.
empty();
1844 if (OnlyToVGPR && !SpillToVGPR)
1848 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
1879 for (
unsigned i =
Offset * PVD.PerVGPR,
1887 bool LastSubReg = (i + 1 == e);
1889 SB.
TII.get(AMDGPU::V_READLANE_B32),
SubReg)
1906 MI->eraseFromParent();
1926 for (
unsigned i =
Offset * PVD.PerVGPR,
1945 unsigned SuperKillState = 0;
1955 MI = RestoreMBB.
end();
1961 for (
unsigned i =
Offset * PVD.PerVGPR,
1968 bool LastSubReg = (i + 1 == e);
1989 switch (
MI->getOpcode()) {
1990 case AMDGPU::SI_SPILL_S1024_SAVE:
1991 case AMDGPU::SI_SPILL_S512_SAVE:
1992 case AMDGPU::SI_SPILL_S384_SAVE:
1993 case AMDGPU::SI_SPILL_S352_SAVE:
1994 case AMDGPU::SI_SPILL_S320_SAVE:
1995 case AMDGPU::SI_SPILL_S288_SAVE:
1996 case AMDGPU::SI_SPILL_S256_SAVE:
1997 case AMDGPU::SI_SPILL_S224_SAVE:
1998 case AMDGPU::SI_SPILL_S192_SAVE:
1999 case AMDGPU::SI_SPILL_S160_SAVE:
2000 case AMDGPU::SI_SPILL_S128_SAVE:
2001 case AMDGPU::SI_SPILL_S96_SAVE:
2002 case AMDGPU::SI_SPILL_S64_SAVE:
2003 case AMDGPU::SI_SPILL_S32_SAVE:
2005 case AMDGPU::SI_SPILL_S1024_RESTORE:
2006 case AMDGPU::SI_SPILL_S512_RESTORE:
2007 case AMDGPU::SI_SPILL_S384_RESTORE:
2008 case AMDGPU::SI_SPILL_S352_RESTORE:
2009 case AMDGPU::SI_SPILL_S320_RESTORE:
2010 case AMDGPU::SI_SPILL_S288_RESTORE:
2011 case AMDGPU::SI_SPILL_S256_RESTORE:
2012 case AMDGPU::SI_SPILL_S224_RESTORE:
2013 case AMDGPU::SI_SPILL_S192_RESTORE:
2014 case AMDGPU::SI_SPILL_S160_RESTORE:
2015 case AMDGPU::SI_SPILL_S128_RESTORE:
2016 case AMDGPU::SI_SPILL_S96_RESTORE:
2017 case AMDGPU::SI_SPILL_S64_RESTORE:
2018 case AMDGPU::SI_SPILL_S32_RESTORE:
2026 int SPAdj,
unsigned FIOperandNum,
2035 assert(SPAdj == 0 &&
"unhandled SP adjustment in call sequence?");
2038 int Index =
MI->getOperand(FIOperandNum).getIndex();
2044 switch (
MI->getOpcode()) {
2046 case AMDGPU::SI_SPILL_S1024_SAVE:
2047 case AMDGPU::SI_SPILL_S512_SAVE:
2048 case AMDGPU::SI_SPILL_S384_SAVE:
2049 case AMDGPU::SI_SPILL_S352_SAVE:
2050 case AMDGPU::SI_SPILL_S320_SAVE:
2051 case AMDGPU::SI_SPILL_S288_SAVE:
2052 case AMDGPU::SI_SPILL_S256_SAVE:
2053 case AMDGPU::SI_SPILL_S224_SAVE:
2054 case AMDGPU::SI_SPILL_S192_SAVE:
2055 case AMDGPU::SI_SPILL_S160_SAVE:
2056 case AMDGPU::SI_SPILL_S128_SAVE:
2057 case AMDGPU::SI_SPILL_S96_SAVE:
2058 case AMDGPU::SI_SPILL_S64_SAVE:
2059 case AMDGPU::SI_SPILL_S32_SAVE: {
2064 case AMDGPU::SI_SPILL_S1024_RESTORE:
2065 case AMDGPU::SI_SPILL_S512_RESTORE:
2066 case AMDGPU::SI_SPILL_S384_RESTORE:
2067 case AMDGPU::SI_SPILL_S352_RESTORE:
2068 case AMDGPU::SI_SPILL_S320_RESTORE:
2069 case AMDGPU::SI_SPILL_S288_RESTORE:
2070 case AMDGPU::SI_SPILL_S256_RESTORE:
2071 case AMDGPU::SI_SPILL_S224_RESTORE:
2072 case AMDGPU::SI_SPILL_S192_RESTORE:
2073 case AMDGPU::SI_SPILL_S160_RESTORE:
2074 case AMDGPU::SI_SPILL_S128_RESTORE:
2075 case AMDGPU::SI_SPILL_S96_RESTORE:
2076 case AMDGPU::SI_SPILL_S64_RESTORE:
2077 case AMDGPU::SI_SPILL_S32_RESTORE: {
2082 case AMDGPU::SI_SPILL_V1024_SAVE:
2083 case AMDGPU::SI_SPILL_V512_SAVE:
2084 case AMDGPU::SI_SPILL_V384_SAVE:
2085 case AMDGPU::SI_SPILL_V352_SAVE:
2086 case AMDGPU::SI_SPILL_V320_SAVE:
2087 case AMDGPU::SI_SPILL_V288_SAVE:
2088 case AMDGPU::SI_SPILL_V256_SAVE:
2089 case AMDGPU::SI_SPILL_V224_SAVE:
2090 case AMDGPU::SI_SPILL_V192_SAVE:
2091 case AMDGPU::SI_SPILL_V160_SAVE:
2092 case AMDGPU::SI_SPILL_V128_SAVE:
2093 case AMDGPU::SI_SPILL_V96_SAVE:
2094 case AMDGPU::SI_SPILL_V64_SAVE:
2095 case AMDGPU::SI_SPILL_V32_SAVE:
2096 case AMDGPU::SI_SPILL_A1024_SAVE:
2097 case AMDGPU::SI_SPILL_A512_SAVE:
2098 case AMDGPU::SI_SPILL_A384_SAVE:
2099 case AMDGPU::SI_SPILL_A352_SAVE:
2100 case AMDGPU::SI_SPILL_A320_SAVE:
2101 case AMDGPU::SI_SPILL_A288_SAVE:
2102 case AMDGPU::SI_SPILL_A256_SAVE:
2103 case AMDGPU::SI_SPILL_A224_SAVE:
2104 case AMDGPU::SI_SPILL_A192_SAVE:
2105 case AMDGPU::SI_SPILL_A160_SAVE:
2106 case AMDGPU::SI_SPILL_A128_SAVE:
2107 case AMDGPU::SI_SPILL_A96_SAVE:
2108 case AMDGPU::SI_SPILL_A64_SAVE:
2109 case AMDGPU::SI_SPILL_A32_SAVE:
2110 case AMDGPU::SI_SPILL_AV1024_SAVE:
2111 case AMDGPU::SI_SPILL_AV512_SAVE:
2112 case AMDGPU::SI_SPILL_AV384_SAVE:
2113 case AMDGPU::SI_SPILL_AV352_SAVE:
2114 case AMDGPU::SI_SPILL_AV320_SAVE:
2115 case AMDGPU::SI_SPILL_AV288_SAVE:
2116 case AMDGPU::SI_SPILL_AV256_SAVE:
2117 case AMDGPU::SI_SPILL_AV224_SAVE:
2118 case AMDGPU::SI_SPILL_AV192_SAVE:
2119 case AMDGPU::SI_SPILL_AV160_SAVE:
2120 case AMDGPU::SI_SPILL_AV128_SAVE:
2121 case AMDGPU::SI_SPILL_AV96_SAVE:
2122 case AMDGPU::SI_SPILL_AV64_SAVE:
2123 case AMDGPU::SI_SPILL_AV32_SAVE: {
2125 AMDGPU::OpName::vdata);
2126 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2130 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2131 auto *
MBB =
MI->getParent();
2134 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2135 *
MI->memoperands_begin(), RS);
2137 MI->eraseFromParent();
2140 case AMDGPU::SI_SPILL_V32_RESTORE:
2141 case AMDGPU::SI_SPILL_V64_RESTORE:
2142 case AMDGPU::SI_SPILL_V96_RESTORE:
2143 case AMDGPU::SI_SPILL_V128_RESTORE:
2144 case AMDGPU::SI_SPILL_V160_RESTORE:
2145 case AMDGPU::SI_SPILL_V192_RESTORE:
2146 case AMDGPU::SI_SPILL_V224_RESTORE:
2147 case AMDGPU::SI_SPILL_V256_RESTORE:
2148 case AMDGPU::SI_SPILL_V288_RESTORE:
2149 case AMDGPU::SI_SPILL_V320_RESTORE:
2150 case AMDGPU::SI_SPILL_V352_RESTORE:
2151 case AMDGPU::SI_SPILL_V384_RESTORE:
2152 case AMDGPU::SI_SPILL_V512_RESTORE:
2153 case AMDGPU::SI_SPILL_V1024_RESTORE:
2154 case AMDGPU::SI_SPILL_A32_RESTORE:
2155 case AMDGPU::SI_SPILL_A64_RESTORE:
2156 case AMDGPU::SI_SPILL_A96_RESTORE:
2157 case AMDGPU::SI_SPILL_A128_RESTORE:
2158 case AMDGPU::SI_SPILL_A160_RESTORE:
2159 case AMDGPU::SI_SPILL_A192_RESTORE:
2160 case AMDGPU::SI_SPILL_A224_RESTORE:
2161 case AMDGPU::SI_SPILL_A256_RESTORE:
2162 case AMDGPU::SI_SPILL_A288_RESTORE:
2163 case AMDGPU::SI_SPILL_A320_RESTORE:
2164 case AMDGPU::SI_SPILL_A352_RESTORE:
2165 case AMDGPU::SI_SPILL_A384_RESTORE:
2166 case AMDGPU::SI_SPILL_A512_RESTORE:
2167 case AMDGPU::SI_SPILL_A1024_RESTORE:
2168 case AMDGPU::SI_SPILL_AV32_RESTORE:
2169 case AMDGPU::SI_SPILL_AV64_RESTORE:
2170 case AMDGPU::SI_SPILL_AV96_RESTORE:
2171 case AMDGPU::SI_SPILL_AV128_RESTORE:
2172 case AMDGPU::SI_SPILL_AV160_RESTORE:
2173 case AMDGPU::SI_SPILL_AV192_RESTORE:
2174 case AMDGPU::SI_SPILL_AV224_RESTORE:
2175 case AMDGPU::SI_SPILL_AV256_RESTORE:
2176 case AMDGPU::SI_SPILL_AV288_RESTORE:
2177 case AMDGPU::SI_SPILL_AV320_RESTORE:
2178 case AMDGPU::SI_SPILL_AV352_RESTORE:
2179 case AMDGPU::SI_SPILL_AV384_RESTORE:
2180 case AMDGPU::SI_SPILL_AV512_RESTORE:
2181 case AMDGPU::SI_SPILL_AV1024_RESTORE: {
2183 AMDGPU::OpName::vdata);
2184 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2188 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2189 auto *
MBB =
MI->getParent();
2192 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2193 *
MI->memoperands_begin(), RS);
2194 MI->eraseFromParent();
2202 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2204 if (
TII->isFLATScratch(*
MI)) {
2205 assert((int16_t)FIOperandNum ==
2207 AMDGPU::OpName::saddr));
2217 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset);
2221 OffsetOp->
setImm(NewOffset);
2228 unsigned Opc =
MI->getOpcode();
2242 AMDGPU::OpName::vdst_in);
2243 bool TiedVDst = VDstIn != -1 &&
2244 MI->getOperand(VDstIn).isReg() &&
2245 MI->getOperand(VDstIn).isTied();
2247 MI->untieRegOperand(VDstIn);
2257 assert (NewVDst != -1 && NewVDstIn != -1 &&
"Must be tied!");
2258 MI->tieOperands(NewVDst, NewVDstIn);
2260 MI->setDesc(
TII->get(NewOpc));
2268 if (
TII->isImmOperandLegal(*
MI, FIOperandNum, FIOp))
2275 bool UseSGPR =
TII->isOperandLegal(*
MI, FIOperandNum, &FIOp);
2277 if (!
Offset && FrameReg && UseSGPR) {
2283 : &AMDGPU::VGPR_32RegClass;
2289 if ((!FrameReg || !
Offset) && TmpReg) {
2290 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2293 MIB.addReg(FrameReg);
2301 RS->
isRegUsed(AMDGPU::SCC) && !
MI->definesRegister(AMDGPU::SCC);
2310 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2321 assert(!(
Offset & 0x1) &&
"Flat scratch offset must be aligned!");
2341 if (TmpSReg == FrameReg) {
2343 if (NeedSaveSCC && !
MI->registerDefIsDead(AMDGPU::SCC)) {
2367 bool IsMUBUF =
TII->isMUBUF(*
MI);
2374 RS->
isRegUsed(AMDGPU::SCC) && !
MI->definesRegister(AMDGPU::SCC);
2376 ? &AMDGPU::SReg_32RegClass
2377 : &AMDGPU::VGPR_32RegClass;
2378 bool IsCopy =
MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2379 MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
2380 Register ResultReg = IsCopy ?
MI->getOperand(0).getReg()
2383 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2385 unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
2386 : AMDGPU::V_LSHRREV_B32_e64;
2391 if (IsSALU && !LiveSCC)
2393 if (IsSALU && LiveSCC) {
2399 ResultReg = NewDest;
2404 if ((MIB =
TII->getAddNoCarry(*
MBB,
MI,
DL, ResultReg, *RS)) !=
2414 const bool IsVOP2 = MIB->
getOpcode() == AMDGPU::V_ADD_U32_e32;
2425 "Need to reuse carry out register");
2430 ConstOffsetReg = getSubReg(MIB.
getReg(1), AMDGPU::sub0);
2432 ConstOffsetReg = MIB.
getReg(1);
2442 if (!MIB || IsSALU) {
2451 Register ScaledReg = TmpScaledReg.
isValid() ? TmpScaledReg : FrameReg;
2463 ResultReg = ScaledReg;
2466 if (!TmpScaledReg.
isValid()) {
2479 MI->eraseFromParent();
2488 assert(
static_cast<int>(FIOperandNum) ==
2490 AMDGPU::OpName::vaddr));
2492 auto &SOffset = *
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset);
2493 assert((SOffset.isImm() && SOffset.getImm() == 0));
2495 if (FrameReg != AMDGPU::NoRegister)
2496 SOffset.ChangeToRegister(FrameReg,
false);
2498 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2500 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm();
2501 int64_t NewOffset = OldImm +
Offset;
2505 MI->eraseFromParent();
2514 if (!
TII->isImmOperandLegal(*
MI, FIOperandNum, FIOp)) {
2532 return &AMDGPU::VReg_64RegClass;
2534 return &AMDGPU::VReg_96RegClass;
2536 return &AMDGPU::VReg_128RegClass;
2538 return &AMDGPU::VReg_160RegClass;
2540 return &AMDGPU::VReg_192RegClass;
2542 return &AMDGPU::VReg_224RegClass;
2544 return &AMDGPU::VReg_256RegClass;
2546 return &AMDGPU::VReg_288RegClass;
2548 return &AMDGPU::VReg_320RegClass;
2550 return &AMDGPU::VReg_352RegClass;
2552 return &AMDGPU::VReg_384RegClass;
2554 return &AMDGPU::VReg_512RegClass;
2556 return &AMDGPU::VReg_1024RegClass;
2564 return &AMDGPU::VReg_64_Align2RegClass;
2566 return &AMDGPU::VReg_96_Align2RegClass;
2568 return &AMDGPU::VReg_128_Align2RegClass;
2570 return &AMDGPU::VReg_160_Align2RegClass;
2572 return &AMDGPU::VReg_192_Align2RegClass;
2574 return &AMDGPU::VReg_224_Align2RegClass;
2576 return &AMDGPU::VReg_256_Align2RegClass;
2578 return &AMDGPU::VReg_288_Align2RegClass;
2580 return &AMDGPU::VReg_320_Align2RegClass;
2582 return &AMDGPU::VReg_352_Align2RegClass;
2584 return &AMDGPU::VReg_384_Align2RegClass;
2586 return &AMDGPU::VReg_512_Align2RegClass;
2588 return &AMDGPU::VReg_1024_Align2RegClass;
2596 return &AMDGPU::VReg_1RegClass;
2598 return &AMDGPU::VGPR_LO16RegClass;
2600 return &AMDGPU::VGPR_32RegClass;
2608 return &AMDGPU::AReg_64RegClass;
2610 return &AMDGPU::AReg_96RegClass;
2612 return &AMDGPU::AReg_128RegClass;
2614 return &AMDGPU::AReg_160RegClass;
2616 return &AMDGPU::AReg_192RegClass;
2618 return &AMDGPU::AReg_224RegClass;
2620 return &AMDGPU::AReg_256RegClass;
2622 return &AMDGPU::AReg_288RegClass;
2624 return &AMDGPU::AReg_320RegClass;
2626 return &AMDGPU::AReg_352RegClass;
2628 return &AMDGPU::AReg_384RegClass;
2630 return &AMDGPU::AReg_512RegClass;
2632 return &AMDGPU::AReg_1024RegClass;
2640 return &AMDGPU::AReg_64_Align2RegClass;
2642 return &AMDGPU::AReg_96_Align2RegClass;
2644 return &AMDGPU::AReg_128_Align2RegClass;
2646 return &AMDGPU::AReg_160_Align2RegClass;
2648 return &AMDGPU::AReg_192_Align2RegClass;
2650 return &AMDGPU::AReg_224_Align2RegClass;
2652 return &AMDGPU::AReg_256_Align2RegClass;
2654 return &AMDGPU::AReg_288_Align2RegClass;
2656 return &AMDGPU::AReg_320_Align2RegClass;
2658 return &AMDGPU::AReg_352_Align2RegClass;
2660 return &AMDGPU::AReg_384_Align2RegClass;
2662 return &AMDGPU::AReg_512_Align2RegClass;
2664 return &AMDGPU::AReg_1024_Align2RegClass;
2672 return &AMDGPU::AGPR_LO16RegClass;
2674 return &AMDGPU::AGPR_32RegClass;
2682 return &AMDGPU::AV_64RegClass;
2684 return &AMDGPU::AV_96RegClass;
2686 return &AMDGPU::AV_128RegClass;
2688 return &AMDGPU::AV_160RegClass;
2690 return &AMDGPU::AV_192RegClass;
2692 return &AMDGPU::AV_224RegClass;
2694 return &AMDGPU::AV_256RegClass;
2696 return &AMDGPU::AV_288RegClass;
2698 return &AMDGPU::AV_320RegClass;
2700 return &AMDGPU::AV_352RegClass;
2702 return &AMDGPU::AV_384RegClass;
2704 return &AMDGPU::AV_512RegClass;
2706 return &AMDGPU::AV_1024RegClass;
2714 return &AMDGPU::AV_64_Align2RegClass;
2716 return &AMDGPU::AV_96_Align2RegClass;
2718 return &AMDGPU::AV_128_Align2RegClass;
2720 return &AMDGPU::AV_160_Align2RegClass;
2722 return &AMDGPU::AV_192_Align2RegClass;
2724 return &AMDGPU::AV_224_Align2RegClass;
2726 return &AMDGPU::AV_256_Align2RegClass;
2728 return &AMDGPU::AV_288_Align2RegClass;
2730 return &AMDGPU::AV_320_Align2RegClass;
2732 return &AMDGPU::AV_352_Align2RegClass;
2734 return &AMDGPU::AV_384_Align2RegClass;
2736 return &AMDGPU::AV_512_Align2RegClass;
2738 return &AMDGPU::AV_1024_Align2RegClass;
2746 return &AMDGPU::VGPR_LO16RegClass;
2748 return &AMDGPU::AV_32RegClass;
2757 return &AMDGPU::SGPR_LO16RegClass;
2759 return &AMDGPU::SReg_32RegClass;
2761 return &AMDGPU::SReg_64RegClass;
2763 return &AMDGPU::SGPR_96RegClass;
2765 return &AMDGPU::SGPR_128RegClass;
2767 return &AMDGPU::SGPR_160RegClass;
2769 return &AMDGPU::SGPR_192RegClass;
2771 return &AMDGPU::SGPR_224RegClass;
2773 return &AMDGPU::SGPR_256RegClass;
2775 return &AMDGPU::SGPR_288RegClass;
2777 return &AMDGPU::SGPR_320RegClass;
2779 return &AMDGPU::SGPR_352RegClass;
2781 return &AMDGPU::SGPR_384RegClass;
2783 return &AMDGPU::SGPR_512RegClass;
2785 return &AMDGPU::SGPR_1024RegClass;
2793 if (Reg.isVirtual())
2794 RC =
MRI.getRegClass(Reg);
2796 RC = getPhysRegBaseClass(Reg);
2802 unsigned Size = getRegSizeInBits(*SRC);
2804 assert(VRC &&
"Invalid register class size");
2810 unsigned Size = getRegSizeInBits(*SRC);
2812 assert(ARC &&
"Invalid register class size");
2818 unsigned Size = getRegSizeInBits(*VRC);
2820 return &AMDGPU::SGPR_32RegClass;
2822 assert(SRC &&
"Invalid register class size");
2829 unsigned SubIdx)
const {
2832 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2833 return MatchRC && MatchRC->
hasSubClassEq(SuperRC) ? MatchRC :
nullptr;
2849 unsigned SrcSubReg)
const {
2866 return getCommonSubClass(DefRC, SrcRC) !=
nullptr;
2882 bool ReserveHighestVGPR)
const {
2883 if (ReserveHighestVGPR) {
2885 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
2889 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
2906 unsigned EltSize)
const {
2908 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
2910 const unsigned RegDWORDs = RegBitWidth / 32;
2911 const unsigned EltDWORDs = EltSize / 4;
2912 assert(RegSplitParts.size() + 1 >= EltDWORDs);
2914 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2915 const unsigned NumParts = RegDWORDs / EltDWORDs;
2917 return ArrayRef(Parts.data(), NumParts);
2923 return Reg.isVirtual() ?
MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
2930 return getSubRegisterClass(SrcRC, MO.
getSubReg());
2955 unsigned SrcSize = getRegSizeInBits(*SrcRC);
2956 unsigned DstSize = getRegSizeInBits(*DstRC);
2957 unsigned NewSize = getRegSizeInBits(*NewRC);
2963 if (SrcSize <= 32 || DstSize <= 32)
2966 return NewSize <= DstSize || NewSize <= SrcSize;
2975 switch (RC->
getID()) {
2977 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
2978 case AMDGPU::VGPR_32RegClassID:
2979 case AMDGPU::VGPR_LO16RegClassID:
2980 case AMDGPU::VGPR_HI16RegClassID:
2982 case AMDGPU::SGPR_32RegClassID:
2983 case AMDGPU::SGPR_LO16RegClassID:
2989 unsigned Idx)
const {
2990 if (
Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
2991 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
2995 if (
Idx == AMDGPU::RegisterPressureSets::SReg_32)
3003 static const int Empty[] = { -1 };
3005 if (RegPressureIgnoredUnits[RegUnit])
3008 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3013 return AMDGPU::SGPR30_SGPR31;
3019 switch (RB.
getID()) {
3020 case AMDGPU::VGPRRegBankID:
3022 case AMDGPU::VCCRegBankID:
3024 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3025 : &AMDGPU::SReg_64_XEXECRegClass;
3026 case AMDGPU::SGPRRegBankID:
3028 case AMDGPU::AGPRRegBankID:
3043 return getAllocatableClass(RC);
3049 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3053 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3059 : &AMDGPU::VReg_64RegClass;
3064 switch ((
int)RCID) {
3065 case AMDGPU::SReg_1RegClassID:
3067 case AMDGPU::SReg_1_XEXECRegClassID:
3068 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3069 : &AMDGPU::SReg_64_XEXECRegClass;
3073 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3086 if (Reg.isVirtual()) {
3091 :
MRI.getMaxLaneMaskForVReg(Reg);
3095 if ((S.LaneMask & SubLanes) == SubLanes) {
3096 V = S.getVNInfoAt(UseIdx);
3124 if (!Def || !MDT.dominates(Def, &
Use))
3127 assert(Def->modifiesRegister(Reg,
this));
3133 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3136 AMDGPU::SReg_32RegClass,
3137 AMDGPU::AGPR_32RegClass } ) {
3138 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3141 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3142 &AMDGPU::VGPR_32RegClass)) {
3146 return AMDGPU::NoRegister;
3169 unsigned Size = getRegSizeInBits(*RC);
3203 return std::min(128u, getSubRegIdxSize(
SubReg));
3207 return std::min(32u, getSubRegIdxSize(
SubReg));
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static unsigned getNumSubRegsForSpillOp(unsigned Op)
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static const char * getRegisterName(MCRegister Reg)
uint32_t getLDSSize() const
bool isEntryFunction() const
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool enableFlatScratch() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
const SIFrameLowering * getFrameLowering() const override
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasFlatScratchSTMode() const
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
A set of physical registers with utility functions to track liveness when walking backward/forward th...
bool contains(MCPhysReg Reg) const
Returns true if register Reg is contained in the set.
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
This class represents the liveness of a register, stack slot, etc.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Describe properties that are true of each instruction in the target description file.
MCRegAliasIterator enumerates all registers aliasing Reg.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available and do the appropriate bookkeeping.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void assignRegToScavengingIndex(int FI, Register Reg, MachineInstr *Restore=nullptr)
Record that Reg is in use at scavenging index FI.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
static bool isFLATScratch(const MachineInstr &MI)
static bool isLegalMUBUFImmOffset(unsigned Imm)
static bool isMUBUF(const MachineInstr &MI)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Register getVGPRForAGPRCopy() const
Register getFrameOffsetReg() const
void addToSpilledVGPRs(unsigned num)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVGPRLanes(int FrameIndex) const
const ReservedRegSet & getWWMReservedRegs() const
ArrayRef< Register > getSGPRSpillVGPRs() const
void addToSpilledSGPRs(unsigned num)
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LivePhysRegs *LiveRegs=nullptr) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr) const
Special case of eliminateFrameIndex.
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false) const
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
bool isValid() const
Returns true if this is a valid index.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
StringRef - Represent a constant reference to a string, i.e.
const uint8_t TSFlags
Configurable target specific flags.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
const MCRegisterClass * MC
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
A Use represents the edge between a Value definition and its users.
VNInfo - Value Number Information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
@ OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_LAST
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
auto reverse(ContainerTy &&C)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
constexpr unsigned BitWidth
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
The llvm::once_flag structure.