28 "amdgpu-spill-sgpr-to-smem",
29 cl::desc(
"Use scalar stores to spill SGPRs if supported by subtarget"),
34 for (
unsigned i = 0; PSets[
i] != -1; ++
i) {
35 if (PSets[
i] == (
int)PSetID)
41 void SIRegisterInfo::classifyPressureSet(
unsigned PSetID,
unsigned Reg,
44 const int *PSets = getRegUnitPressureSets(*U);
46 PressureSets.
set(PSetID);
53 SGPRPressureSets(getNumRegPressureSets()),
54 VGPRPressureSets(getNumRegPressureSets()) {
55 unsigned NumRegPressureSets = getNumRegPressureSets();
57 SGPRSetID = NumRegPressureSets;
58 VGPRSetID = NumRegPressureSets;
60 for (
unsigned i = 0;
i < NumRegPressureSets; ++
i) {
61 classifyPressureSet(
i, AMDGPU::SGPR0, SGPRPressureSets);
62 classifyPressureSet(
i, AMDGPU::VGPR0, VGPRPressureSets);
66 std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
67 for (
unsigned i = 0, e = getNumRegUnits();
i != e; ++
i) {
68 const int *PSets = getRegUnitPressureSets(
i);
69 for (
unsigned j = 0; PSets[j] != -1; ++j) {
70 ++PressureSetRegUnits[PSets[j]];
74 unsigned VGPRMax = 0, SGPRMax = 0;
75 for (
unsigned i = 0;
i < NumRegPressureSets; ++
i) {
78 VGPRMax = PressureSetRegUnits[
i];
83 SGPRMax = PressureSetRegUnits[
i];
87 assert(SGPRSetID < NumRegPressureSets &&
88 VGPRSetID < NumRegPressureSets);
91 void SIRegisterInfo::reserveRegisterTuples(
BitVector &Reserved,
unsigned Reg)
const {
94 for (; R.isValid(); ++R)
101 unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
102 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
120 return AMDGPU::SGPR_32RegClass.getRegister(Reg);
125 Reserved.
set(AMDGPU::INDIRECT_BASE_ADDR);
129 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
130 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
133 reserveRegisterTuples(Reserved, AMDGPU::TBA);
134 reserveRegisterTuples(Reserved, AMDGPU::TMA);
135 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
136 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
137 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
138 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
139 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
140 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
143 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
144 for (
unsigned i = MaxNumSGPRs;
i < TotalNumSGPRs; ++
i) {
145 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(
i);
146 reserveRegisterTuples(Reserved, Reg);
150 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
151 for (
unsigned i = MaxNumVGPRs;
i < TotalNumVGPRs; ++
i) {
152 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(
i);
153 reserveRegisterTuples(Reserved, Reg);
159 if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
161 reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
165 if (ScratchRSrcReg != AMDGPU::NoRegister) {
169 reserveRegisterTuples(Reserved, ScratchRSrcReg);
170 assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
210 AMDGPU::OpName::offset);
220 AMDGPU::OpName::vaddr) &&
221 "Should never see frame index on non-address operand");
232 return !isUInt<12>(FullOffset);
242 if (Ins != MBB->
end())
243 DL = Ins->getDebugLoc();
250 BuildMI(*MBB, Ins, DL, TII->
get(AMDGPU::V_MOV_B32_e32), BaseReg)
251 .addFrameIndex(FrameIdx);
261 BuildMI(*MBB, Ins, DL, TII->
get(AMDGPU::S_MOV_B32), OffsetReg)
263 BuildMI(*MBB, Ins, DL, TII->
get(AMDGPU::V_MOV_B32_e32), FIReg)
264 .addFrameIndex(FrameIdx);
266 BuildMI(*MBB, Ins, DL, TII->
get(AMDGPU::V_ADD_I32_e64), BaseReg)
293 MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
294 assert(FIOp && FIOp->
isFI() &&
"frame index must be address operand");
298 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
300 assert(isUInt<12>(NewOffset) &&
"offset should be legal");
303 OffsetOp->setImm(NewOffset);
314 return isUInt<12>(NewOffset);
322 return &AMDGPU::VGPR_32RegClass;
328 case AMDGPU::SI_SPILL_S512_SAVE:
329 case AMDGPU::SI_SPILL_S512_RESTORE:
330 case AMDGPU::SI_SPILL_V512_SAVE:
331 case AMDGPU::SI_SPILL_V512_RESTORE:
333 case AMDGPU::SI_SPILL_S256_SAVE:
334 case AMDGPU::SI_SPILL_S256_RESTORE:
335 case AMDGPU::SI_SPILL_V256_SAVE:
336 case AMDGPU::SI_SPILL_V256_RESTORE:
338 case AMDGPU::SI_SPILL_S128_SAVE:
339 case AMDGPU::SI_SPILL_S128_RESTORE:
340 case AMDGPU::SI_SPILL_V128_SAVE:
341 case AMDGPU::SI_SPILL_V128_RESTORE:
343 case AMDGPU::SI_SPILL_V96_SAVE:
344 case AMDGPU::SI_SPILL_V96_RESTORE:
346 case AMDGPU::SI_SPILL_S64_SAVE:
347 case AMDGPU::SI_SPILL_S64_RESTORE:
348 case AMDGPU::SI_SPILL_V64_SAVE:
349 case AMDGPU::SI_SPILL_V64_RESTORE:
351 case AMDGPU::SI_SPILL_S32_SAVE:
352 case AMDGPU::SI_SPILL_S32_RESTORE:
353 case AMDGPU::SI_SPILL_V32_SAVE:
354 case AMDGPU::SI_SPILL_V32_RESTORE:
362 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
363 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
364 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
365 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
366 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
367 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
368 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
369 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
370 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
371 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
379 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
380 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
381 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
382 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
383 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
384 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
385 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
386 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
387 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
388 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
389 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
390 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
391 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
392 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
406 const DebugLoc &DL = MI->getDebugLoc();
407 bool IsStore = MI->mayStore();
409 unsigned Opc = MI->getOpcode();
410 int LoadStoreOp = IsStore ?
412 if (LoadStoreOp == -1)
417 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
425 .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
430 unsigned LoadStoreOp,
434 unsigned ScratchRsrcReg,
435 unsigned ScratchOffsetReg,
449 bool RanOutOfSGPRs =
false;
450 bool Scavenged =
false;
451 unsigned SOffset = ScratchOffsetReg;
455 unsigned Size = NumSubRegs * 4;
457 const int64_t OriginalImmOffset =
Offset;
462 if (!isUInt<12>(Offset + Size)) {
463 SOffset = AMDGPU::NoRegister;
470 if (SOffset == AMDGPU::NoRegister) {
478 RanOutOfSGPRs =
true;
479 SOffset = ScratchOffsetReg;
484 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_ADD_U32), SOffset)
485 .addReg(ScratchOffsetReg)
491 const unsigned EltSize = 4;
493 for (
unsigned i = 0, e = NumSubRegs;
i != e; ++
i, Offset += EltSize) {
494 unsigned SubReg = NumSubRegs == 1 ?
497 unsigned SOffsetRegState = 0;
508 EltSize,
MinAlign(Align, EltSize * i));
510 auto MIB =
BuildMI(*MBB, MI, DL, Desc)
513 .
addReg(SOffset, SOffsetRegState)
526 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
527 .addReg(ScratchOffsetReg)
528 .
addImm(OriginalImmOffset);
534 if (SuperRegSize % 16 == 0) {
535 return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
536 AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
539 if (SuperRegSize % 8 == 0) {
540 return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
541 AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
544 return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
545 AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
557 unsigned SuperReg = MI->getOperand(0).getReg();
558 bool IsKill = MI->getOperand(0).isKill();
559 const DebugLoc &DL = MI->getDebugLoc();
566 assert(SuperReg != AMDGPU::M0 &&
"m0 should never spill");
568 unsigned OffsetReg = AMDGPU::M0;
569 unsigned M0CopyReg = AMDGPU::NoRegister;
574 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::COPY), M0CopyReg)
579 unsigned ScalarStoreOp;
580 unsigned EltSize = 4;
589 unsigned NumSubRegs = SplitParts.
empty() ? 1 : SplitParts.
size();
593 for (
unsigned i = 0, e = NumSubRegs; i < e; ++
i) {
594 unsigned SubReg = NumSubRegs == 1 ?
595 SuperReg : getSubReg(SuperReg, SplitParts[i]);
598 int64_t FrOffset = FrameInfo.getObjectOffset(Index);
607 unsigned Align = FrameInfo.getObjectAlignment(Index);
612 EltSize,
MinAlign(Align, EltSize * i));
619 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_ADD_U32), OffsetReg)
623 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_MOV_B32), OffsetReg)
627 BuildMI(*MBB, MI, DL, TII->
get(ScalarStoreOp))
641 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
656 =
BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::V_MOV_B32_e32), TmpReg)
657 .addReg(SubReg, SubKillState);
662 if (NumSubRegs > 1) {
664 unsigned SuperKillState = 0;
670 unsigned Align = FrameInfo.getObjectAlignment(Index);
675 EltSize,
MinAlign(Align, EltSize * i));
676 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::SI_SPILL_V32_SAVE))
686 if (M0CopyReg != AMDGPU::NoRegister) {
687 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::COPY), AMDGPU::M0)
691 MI->eraseFromParent();
705 const DebugLoc &DL = MI->getDebugLoc();
707 unsigned SuperReg = MI->getOperand(0).getReg();
710 assert(SuperReg != AMDGPU::M0 &&
"m0 should never spill");
712 unsigned OffsetReg = AMDGPU::M0;
713 unsigned M0CopyReg = AMDGPU::NoRegister;
718 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::COPY), M0CopyReg)
723 unsigned EltSize = 4;
724 unsigned ScalarLoadOp;
734 unsigned NumSubRegs = SplitParts.
empty() ? 1 : SplitParts.
size();
737 int64_t FrOffset = FrameInfo.getObjectOffset(Index);
739 for (
unsigned i = 0, e = NumSubRegs; i < e; ++
i) {
740 unsigned SubReg = NumSubRegs == 1 ?
741 SuperReg : getSubReg(SuperReg, SplitParts[i]);
745 unsigned Align = FrameInfo.getObjectAlignment(Index);
750 EltSize,
MinAlign(Align, EltSize * i));
755 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_ADD_U32), OffsetReg)
759 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_MOV_B32), OffsetReg)
781 BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
792 unsigned Align = FrameInfo.getObjectAlignment(Index);
801 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
802 .addFrameIndex(Index)
817 if (M0CopyReg != AMDGPU::NoRegister) {
818 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::COPY), AMDGPU::M0)
822 MI->eraseFromParent();
826 int SPAdj,
unsigned FIOperandNum,
838 int Index = MI->getOperand(FIOperandNum).getIndex();
840 switch (MI->getOpcode()) {
842 case AMDGPU::SI_SPILL_S512_SAVE:
843 case AMDGPU::SI_SPILL_S256_SAVE:
844 case AMDGPU::SI_SPILL_S128_SAVE:
845 case AMDGPU::SI_SPILL_S64_SAVE:
846 case AMDGPU::SI_SPILL_S32_SAVE: {
852 case AMDGPU::SI_SPILL_S512_RESTORE:
853 case AMDGPU::SI_SPILL_S256_RESTORE:
854 case AMDGPU::SI_SPILL_S128_RESTORE:
855 case AMDGPU::SI_SPILL_S64_RESTORE:
856 case AMDGPU::SI_SPILL_S32_RESTORE: {
862 case AMDGPU::SI_SPILL_V512_SAVE:
863 case AMDGPU::SI_SPILL_V256_SAVE:
864 case AMDGPU::SI_SPILL_V128_SAVE:
865 case AMDGPU::SI_SPILL_V96_SAVE:
866 case AMDGPU::SI_SPILL_V64_SAVE:
867 case AMDGPU::SI_SPILL_V32_SAVE: {
869 AMDGPU::OpName::vdata);
870 buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
873 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
874 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
875 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
876 *MI->memoperands_begin(),
879 MI->eraseFromParent();
882 case AMDGPU::SI_SPILL_V32_RESTORE:
883 case AMDGPU::SI_SPILL_V64_RESTORE:
884 case AMDGPU::SI_SPILL_V96_RESTORE:
885 case AMDGPU::SI_SPILL_V128_RESTORE:
886 case AMDGPU::SI_SPILL_V256_RESTORE:
887 case AMDGPU::SI_SPILL_V512_RESTORE: {
889 AMDGPU::OpName::vdata);
891 buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
894 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
895 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
896 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
897 *MI->memoperands_begin(),
899 MI->eraseFromParent();
904 if (TII->isMUBUF(*MI)) {
906 assert(static_cast<int>(FIOperandNum) ==
908 AMDGPU::OpName::vaddr));
910 int64_t Offset = FrameInfo.getObjectOffset(Index);
912 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
913 int64_t NewOffset = OldImm +
Offset;
915 if (isUInt<12>(NewOffset) &&
917 MI->eraseFromParent();
922 int64_t Offset = FrameInfo.getObjectOffset(Index);
924 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
926 BuildMI(*MBB, MI, MI->getDebugLoc(),
927 TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
941 &AMDGPU::VGPR_32RegClass,
942 &AMDGPU::SReg_32RegClass,
943 &AMDGPU::VReg_64RegClass,
944 &AMDGPU::SReg_64RegClass,
945 &AMDGPU::VReg_96RegClass,
946 &AMDGPU::VReg_128RegClass,
947 &AMDGPU::SReg_128RegClass,
948 &AMDGPU::VReg_256RegClass,
949 &AMDGPU::SReg_256RegClass,
950 &AMDGPU::VReg_512RegClass,
951 &AMDGPU::SReg_512RegClass,
952 &AMDGPU::SCC_CLASSRegClass,
956 if (BaseClass->contains(Reg)) {
967 case 0:
return false;
968 case 1:
return false;
970 return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) !=
nullptr;
972 return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) !=
nullptr;
974 return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) !=
nullptr;
976 return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) !=
nullptr;
978 return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) !=
nullptr;
980 return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) !=
nullptr;
990 return &AMDGPU::VGPR_32RegClass;
992 return &AMDGPU::VReg_64RegClass;
994 return &AMDGPU::VReg_96RegClass;
996 return &AMDGPU::VReg_128RegClass;
998 return &AMDGPU::VReg_256RegClass;
1000 return &AMDGPU::VReg_512RegClass;
1010 return &AMDGPU::SGPR_32RegClass;
1012 return &AMDGPU::SReg_64RegClass;
1014 return &AMDGPU::SReg_128RegClass;
1016 return &AMDGPU::SReg_256RegClass;
1018 return &AMDGPU::SReg_512RegClass;
1026 if (SubIdx == AMDGPU::NoSubRegister)
1035 return &AMDGPU::SGPR_32RegClass;
1037 return &AMDGPU::SReg_64RegClass;
1039 return &AMDGPU::SReg_128RegClass;
1041 return &AMDGPU::SReg_256RegClass;
1049 return &AMDGPU::VGPR_32RegClass;
1051 return &AMDGPU::VReg_64RegClass;
1053 return &AMDGPU::VReg_96RegClass;
1055 return &AMDGPU::VReg_128RegClass;
1057 return &AMDGPU::VReg_256RegClass;
1069 unsigned SrcSubReg)
const {
1086 return getCommonSubClass(DefRC, SrcRC) !=
nullptr;
1101 return MFI->WorkGroupIDXSystemSGPR;
1104 return MFI->WorkGroupIDYSystemSGPR;
1107 return MFI->WorkGroupIDZSystemSGPR;
1109 return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
1111 if (ST.isAmdCodeObjectV2(MF)) {
1113 return MFI->PrivateSegmentBufferUserSGPR;
1116 return MFI->PrivateMemoryPtrUserSGPR;
1119 return MFI->KernargSegmentPtrUserSGPR;
1122 return MFI->DispatchIDUserSGPR;
1125 return MFI->FlatScratchInitUserSGPR;
1128 return MFI->DispatchPtrUserSGPR;
1131 return MFI->QueuePtrUserSGPR;
1134 return AMDGPU::VGPR0;
1137 return AMDGPU::VGPR1;
1140 return AMDGPU::VGPR2;
1153 for (
unsigned Reg : *RC)
1156 return AMDGPU::NoRegister;
1188 unsigned WavesPerEU)
const {
1190 switch (WavesPerEU) {
1198 switch (WavesPerEU) {
1212 unsigned WavesPerEU,
1213 bool Addressable)
const {
1215 switch (WavesPerEU) {
1223 switch (WavesPerEU) {
1244 std::pair<unsigned, unsigned> WavesPerEU = MFI.
getWavesPerEU();
1245 unsigned MaxNumSGPRs =
getMaxNumSGPRs(ST, WavesPerEU.first,
false);
1246 unsigned MaxNumAddressableSGPRs =
getMaxNumSGPRs(ST, WavesPerEU.first,
true);
1252 F,
"amdgpu-num-sgpr", MaxNumSGPRs);
1265 unsigned NumInputSGPRs = MFI.getNumPreloadedSGPRs();
1266 if (Requested && Requested < NumInputSGPRs)
1267 Requested = NumInputSGPRs;
1271 if (Requested && Requested >
getMaxNumSGPRs(ST, WavesPerEU.first,
false))
1273 if (WavesPerEU.second &&
1278 MaxNumSGPRs = Requested;
1285 MaxNumAddressableSGPRs);
1296 switch (WavesPerEU) {
1307 default:
return 129;
1312 switch (WavesPerEU) {
1335 std::pair<unsigned, unsigned> WavesPerEU = MFI.
getWavesPerEU();
1342 F,
"amdgpu-num-vgpr", MaxNumVGPRs);
1352 if (WavesPerEU.second &&
1357 MaxNumVGPRs = Requested;
1364 unsigned EltSize)
const {
1366 static const int16_t Sub0_15[] = {
1367 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1368 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1369 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1370 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1373 static const int16_t Sub0_7[] = {
1374 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1375 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1378 static const int16_t Sub0_3[] = {
1379 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1382 static const int16_t Sub0_2[] = {
1383 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1386 static const int16_t Sub0_1[] = {
1387 AMDGPU::sub0, AMDGPU::sub1,
1409 static const int16_t Sub0_15_64[] = {
1410 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1411 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1412 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1413 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1416 static const int16_t Sub0_7_64[] = {
1417 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1418 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1422 static const int16_t Sub0_3_64[] = {
1423 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1440 assert(EltSize == 16 &&
"unhandled register spill split size");
1442 static const int16_t Sub0_15_128[] = {
1443 AMDGPU::sub0_sub1_sub2_sub3,
1444 AMDGPU::sub4_sub5_sub6_sub7,
1445 AMDGPU::sub8_sub9_sub10_sub11,
1446 AMDGPU::sub12_sub13_sub14_sub15
1449 static const int16_t Sub0_7_128[] = {
1450 AMDGPU::sub0_sub1_sub2_sub3,
1451 AMDGPU::sub4_sub5_sub6_sub7
1468 unsigned Reg)
const {
1476 unsigned Reg)
const {
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
Interface definition for SIRegisterInfo.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
bool hasFlatScratchInit() const
Flags getFlags() const
Return the raw flags of the source value,.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
void addToSpilledSGPRs(unsigned num)
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
void addToSpilledVGPRs(unsigned num)
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static int getOffsetMUBUFLoad(unsigned Opc)
bool isSGPRPressureSet(unsigned SetID) const
bool hasDispatchID() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
Describe properties that are true of each instruction in the target description file.
MachineInstrBuilder MachineInstrBuilder &DefMI const MCInstrDesc & Desc
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
void restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool hasWorkGroupIDZ() const
unsigned getScratchWaveOffsetReg() const
unsigned getID() const
Return the register class ID number.
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
bool isSGPRClass(const TargetRegisterClass *RC) const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
const MachinePointerInfo & getPointerInfo() const
bool hasWorkItemIDZ() const
unsigned getNumReservedSGPRs(const SISubtarget &ST, const SIMachineFunctionInfo &MFI) const
iterator_range< mop_iterator > operands()
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
bool hasWorkGroupIDY() const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
unsigned getSize() const
Return the size of the register in bytes, which is also the size of a stack slot allocated to hold a ...
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU, bool Addressable) const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Reg
All possible values of the reg field in the ModR/M byte.
MachinePointerInfo getWithOffset(int64_t O) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool hasPressureSet(const int *PSets, unsigned PSetID)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isMUBUF(const MachineInstr &MI)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
bool isXNACKEnabled() const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
bool hasWorkGroupIDX() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Generation getGeneration() const
size_t size() const
size - Get the array size.
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bool hasSGPRInitBug() const
const MachineBasicBlock * getParent() const
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
This file declares the machine register scavenger class.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
unsigned const MachineRegisterInfo * MRI
bool hasVGPRs(const TargetRegisterClass *RC) const
unsigned getPreloadedValue(const MachineFunction &MF, enum PreloadedValue Value) const
Returns the physical register that Value is stored in.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
const MachineOperand & getOperand(unsigned i) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getSubRegFromChannel(unsigned Channel) const
bool hasDispatchPtr() const
MCRegAliasIterator enumerates all registers aliasing Reg.
bool hasKernargSegmentPtr() const
bool hasScalarStores() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasWorkItemIDX() const
bool debuggerReserveRegs() const
bool empty() const
empty - Check if the array is empty.
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
The memory access writes data.
unsigned getNumAddressableSGPRs(const SISubtarget &ST) const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
unsigned getNumDebuggerReservedVGPRs(const SISubtarget &ST) const
unsigned countPopulation(T Value)
Count the number of set bits in a value.
MachineOperand class - Representation of each machine instruction operand.
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex, unsigned SubIdx)
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
const MachineInstrBuilder & addFrameIndex(int Idx) const
void spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SynchronizationScope SynchScope=CrossThread, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const MCRegisterClass * MC
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the 'base' register class for this register.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Representation of each machine instruction.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
std::pair< unsigned, unsigned > getWavesPerEU() const
unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const
Find an unused register of the specified register class.
unsigned getTotalNumSGPRs(const SISubtarget &ST) const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
unsigned getWavefrontSize() const
static int getOffsetMUBUFStore(unsigned Opc)
unsigned getMinNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const
bool hasWorkItemIDY() const
unsigned getReg() const
getReg - Returns the register number.
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
bool hasPrivateMemoryInputPtr() const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
unsigned getTotalNumVGPRs() const
bool hasPrivateSegmentBuffer() const