21#define DEBUG_TYPE "frame-info"
24 "amdgpu-spill-vgpr-to-agpr",
25 cl::desc(
"Enable spilling VGPRs to AGPRs"),
48 bool Unused =
false) {
51 for (
unsigned i = 0; CSRegs[i]; ++i)
52 LiveRegs.
addReg(CSRegs[i]);
75 unsigned Size =
TRI->getSpillSize(RC);
76 Align Alignment =
TRI->getSpillAlign(RC);
85 int FI = FrameInfo.CreateStackObject(
Size, Alignment,
true,
nullptr,
88 if (
TRI->spillSGPRToVGPR() &&
94 SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));
99 <<
printReg(Spill.VGPR,
TRI) <<
':' << Spill.Lane <<
'\n';);
104 FI = FrameInfo.CreateSpillStackObject(
Size, Alignment);
114 SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
115 LiveRegs.
addReg(ScratchSGPR);
130 int64_t DwordOff = 0) {
131 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
132 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
139 LiveRegs.
addReg(SpillReg);
141 TRI.buildSpillLoadStore(
MBB,
I,
DL, Opc, FI, SpillReg, IsKill, FrameReg,
142 DwordOff, MMO,
nullptr, &LiveRegs);
154 Register FrameReg, int64_t DwordOff = 0) {
155 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
156 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
163 TRI.buildSpillLoadStore(
MBB,
I,
DL, Opc, FI, SpillReg,
false, FrameReg,
164 DwordOff, MMO,
nullptr, &LiveRegs);
174 Register TargetLo =
TRI->getSubReg(TargetReg, AMDGPU::sub0);
175 Register TargetHi =
TRI->getSubReg(TargetReg, AMDGPU::sub1);
196 if (LiveRegs.
empty()) {
230 unsigned EltSize = 4;
232 void saveToMemory(
const int FI)
const {
239 MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
243 for (
unsigned I = 0, DwordOff = 0;
I < NumSubRegs; ++
I) {
251 FI, FrameReg, DwordOff);
256 void saveToVGPRLane(
const int FI)
const {
261 FuncInfo->getPrologEpilogSGPRSpillToVGPRLanes(FI);
264 for (
unsigned I = 0;
I < NumSubRegs; ++
I) {
275 void copyToScratchSGPR(
Register DstReg)
const {
281 void restoreFromMemory(
const int FI) {
286 MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
290 for (
unsigned I = 0, DwordOff = 0;
I < NumSubRegs; ++
I) {
296 FI, FrameReg, DwordOff);
303 void restoreFromVGPRLane(
const int FI) {
306 FuncInfo->getPrologEpilogSGPRSpillToVGPRLanes(FI);
309 for (
unsigned I = 0;
I < NumSubRegs; ++
I) {
319 void copyFromScratchSGPR(
Register SrcReg)
const {
334 ST(MF.getSubtarget<
GCNSubtarget>()), MFI(MF.getFrameInfo()),
336 SuperReg(Reg),
SI(
SI), LiveRegs(LiveRegs),
DL(
DL), FrameReg(FrameReg) {
338 SplitParts =
TRI.getRegSplitParts(RC, EltSize);
339 NumSubRegs = SplitParts.
empty() ? 1 : SplitParts.
size();
341 assert(SuperReg != AMDGPU::M0 &&
"m0 should never spill");
345 switch (
SI.getKind()) {
347 return saveToMemory(
SI.getIndex());
349 return saveToVGPRLane(
SI.getIndex());
351 return copyToScratchSGPR(
SI.getReg());
356 switch (
SI.getKind()) {
358 return restoreFromMemory(
SI.getIndex());
360 return restoreFromVGPRLane(
SI.getIndex());
362 return copyFromScratchSGPR(
SI.getReg());
370void SIFrameLowering::emitEntryFunctionFlatScratchInit(
391 if (
ST.isAmdPalOS()) {
399 Register FlatScrInit = AMDGPU::NoRegister;
402 AllSGPR64s = AllSGPR64s.
slice(
403 std::min(
static_cast<unsigned>(AllSGPR64s.
size()), NumPreloaded));
407 !
TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
412 assert(FlatScrInit &&
"Failed to find free register for scratch init");
414 FlatScrInitLo =
TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
415 FlatScrInitHi =
TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
422 const MCInstrDesc &LoadDwordX2 =
TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
443 And->getOperand(3).setIsDead();
447 assert(FlatScratchInitReg);
450 MRI.addLiveIn(FlatScratchInitReg);
453 FlatScrInitLo =
TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
454 FlatScrInitHi =
TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
458 if (
ST.flatScratchIsPointer()) {
462 .
addReg(ScratchWaveOffsetReg);
470 addReg(FlatScrInitLo).
474 addReg(FlatScrInitHi).
483 .
addReg(ScratchWaveOffsetReg);
503 .
addReg(ScratchWaveOffsetReg);
526Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
539 if (!ScratchRsrcReg || (!
MRI.isPhysRegUsed(ScratchRsrcReg) &&
543 if (
ST.hasSGPRInitBug() ||
544 ScratchRsrcReg !=
TRI->reservedPrivateSegmentBufferReg(MF))
545 return ScratchRsrcReg;
558 AllSGPR128s = AllSGPR128s.
slice(std::min(
static_cast<unsigned>(AllSGPR128s.
size()), NumPreloaded));
567 if (!
MRI.isPhysRegUsed(Reg) &&
MRI.isAllocatable(Reg) &&
568 (!GITPtrLoReg || !
TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
569 MRI.replaceRegWith(ScratchRsrcReg, Reg);
575 return ScratchRsrcReg;
579 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
584 assert(&MF.
front() == &
MBB &&
"Shrink-wrapping not yet supported");
617 if (!ST.enableFlatScratch())
618 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
621 if (ScratchRsrcReg) {
623 if (&OtherBB != &
MBB) {
624 OtherBB.addLiveIn(ScratchRsrcReg);
632 if (ST.isAmdHsaOrMesa(
F)) {
633 PreloadedScratchRsrcReg =
635 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
638 MRI.addLiveIn(PreloadedScratchRsrcReg);
654 if (PreloadedScratchWaveOffsetReg &&
655 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
658 AllSGPRs = AllSGPRs.
slice(
659 std::min(
static_cast<unsigned>(AllSGPRs.
size()), NumPreloaded));
662 if (!
MRI.isPhysRegUsed(Reg) &&
MRI.isAllocatable(Reg) &&
663 !
TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
664 ScratchWaveOffsetReg = Reg;
671 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
673 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
677 assert(SPReg != AMDGPU::SP_REG);
684 assert(FPReg != AMDGPU::FP_REG);
688 bool NeedsFlatScratchInit =
690 (
MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.
hasCalls() ||
693 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
694 PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
695 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
699 if (NeedsFlatScratchInit) {
700 emitEntryFunctionFlatScratchInit(MF,
MBB,
I,
DL, ScratchWaveOffsetReg);
703 if (ScratchRsrcReg) {
704 emitEntryFunctionScratchRsrcRegSetup(MF,
MBB,
I,
DL,
705 PreloadedScratchRsrcReg,
706 ScratchRsrcReg, ScratchWaveOffsetReg);
711void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
722 if (ST.isAmdPalOS()) {
725 Register Rsrc01 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
726 Register Rsrc03 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
733 const MCInstrDesc &LoadDwordX4 =
TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
762 }
else if (
ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
766 Register Rsrc2 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
767 Register Rsrc3 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
773 Register Rsrc01 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
782 const MCInstrDesc &LoadDwordX2 =
TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
801 Register Rsrc0 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
802 Register Rsrc1 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
815 .
addImm(Rsrc23 & 0xffffffff)
821 }
else if (
ST.isAmdHsaOrMesa(Fn)) {
822 assert(PreloadedScratchRsrcReg);
824 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
839 Register ScratchRsrcSub0 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
840 Register ScratchRsrcSub1 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
846 .
addReg(ScratchWaveOffsetReg)
848 auto Addc =
BuildMI(
MBB,
I,
DL,
TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
875 bool EnableInactiveLanes) {
886 MRI, LiveRegs, *
TRI.getWaveMaskRegClass());
887 if (!ScratchExecCopy)
890 LiveRegs.
addReg(ScratchExecCopy);
892 const unsigned SaveExecOpc =
893 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
894 : AMDGPU::S_OR_SAVEEXEC_B32)
895 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
896 : AMDGPU::S_OR_SAVEEXEC_B64);
901 return ScratchExecCopy;
918 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
919 if (!WWMScratchRegs.
empty())
924 auto StoreWWMRegisters =
926 for (
const auto &Reg : WWMRegs) {
934 StoreWWMRegisters(WWMScratchRegs);
935 if (!WWMCalleeSavedRegs.
empty()) {
936 if (ScratchExecCopy) {
937 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
938 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
947 StoreWWMRegisters(WWMCalleeSavedRegs);
948 if (ScratchExecCopy) {
950 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
951 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
954 LiveRegs.
addReg(ScratchExecCopy);
959 for (
const auto &Spill :
FuncInfo->getPrologEpilogSGPRSpills()) {
965 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
977 FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);
978 if (!ScratchSGPRs.
empty()) {
985 if (!LiveRegs.
empty()) {
1002 for (
const auto &Spill :
FuncInfo->getPrologEpilogSGPRSpills()) {
1009 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1014 LiveRegs, FrameReg);
1023 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1024 if (!WWMScratchRegs.
empty())
1029 auto RestoreWWMRegisters =
1031 for (
const auto &Reg : WWMRegs) {
1033 int FI = Reg.second;
1035 VGPR, FI, FrameReg);
1039 RestoreWWMRegisters(WWMScratchRegs);
1040 if (!WWMCalleeSavedRegs.
empty()) {
1041 if (ScratchExecCopy) {
1042 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1043 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1052 RestoreWWMRegisters(WWMCalleeSavedRegs);
1053 if (ScratchExecCopy) {
1055 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1056 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1092 if (
TRI.hasStackRealignment(MF))
1096 if (!HasFP && !
hasFP(MF)) {
1099 FramePtrRegScratchCopy);
1102 Register SGPRForFPSaveRestoreCopy =
1103 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1106 if (SGPRForFPSaveRestoreCopy) {
1113 DL,
TII,
TRI, LiveRegs, FramePtrReg);
1115 LiveRegs.
addReg(SGPRForFPSaveRestoreCopy);
1120 MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass);
1121 if (!FramePtrRegScratchCopy)
1124 LiveRegs.
addReg(FramePtrRegScratchCopy);
1133 RoundedSize += Alignment;
1134 if (LiveRegs.
empty()) {
1149 And->getOperand(3).setIsDead();
1150 FuncInfo->setIsStackRealigned(
true);
1151 }
else if ((HasFP =
hasFP(MF))) {
1160 FramePtrRegScratchCopy);
1161 if (FramePtrRegScratchCopy)
1162 LiveRegs.
removeReg(FramePtrRegScratchCopy);
1169 if ((HasBP =
TRI.hasBasePointer(MF))) {
1175 if (HasFP && RoundedSize != 0) {
1180 Add->getOperand(3).setIsDead();
1183 bool FPSaved =
FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1185 assert((!HasFP || FPSaved) &&
1186 "Needed to save FP but didn't save it anywhere");
1191 "Saved FP but didn't need it");
1193 bool BPSaved =
FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);
1195 assert((!HasBP || BPSaved) &&
1196 "Needed to save BP but didn't save it anywhere");
1198 assert((HasBP || !BPSaved) &&
"Saved BP but didn't need it");
1219 DL =
MBBI->getDebugLoc();
1231 bool FPSaved =
FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1234 Register SGPRForFPSaveRestoreCopy =
1235 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1242 if (SGPRForFPSaveRestoreCopy) {
1243 LiveRegs.
addReg(SGPRForFPSaveRestoreCopy);
1246 MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass);
1247 if (!FramePtrRegScratchCopy)
1250 LiveRegs.
addReg(FramePtrRegScratchCopy);
1254 FramePtrRegScratchCopy);
1257 if (RoundedSize != 0 &&
hasFP(MF)) {
1262 Add->getOperand(3).setIsDead();
1267 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1268 : FramePtrRegScratchCopy;
1272 if (SGPRForFPSaveRestoreCopy)
1277 FramePtrRegScratchCopy);
1289 !
FuncInfo->checkIndexInPrologEpilogSGPRSpills(
I)) {
1319 if (!
FuncInfo->isEntryFunction()) {
1322 FuncInfo->allocateWWMSpill(MF, Reg,
TRI->getSpillSize(*RC),
1323 TRI->getSpillAlign(*RC));
1327 const bool SpillVGPRToAGPR = ST.hasMAIInsts() &&
FuncInfo->hasSpilledVGPRs()
1330 if (SpillVGPRToAGPR) {
1335 bool SeenDbgInstr =
false;
1340 if (
MI.isDebugInstr())
1341 SeenDbgInstr =
true;
1343 if (
TII->isVGPRSpill(
MI)) {
1347 AMDGPU::OpName::vaddr);
1348 int FI =
MI.getOperand(FIOp).getIndex();
1350 TII->getNamedOperand(
MI, AMDGPU::OpName::vdata)->getReg();
1351 if (
FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1352 TRI->isAGPR(
MRI, VReg))) {
1356 TRI->eliminateFrameIndex(
MI, 0, FIOp, RS);
1363 NonVGPRSpillFIs.
set(FrameIndex);
1369 for (
unsigned FI : SpillFIs.
set_bits())
1370 if (!NonVGPRSpillFIs.
test(FI))
1371 FuncInfo->setVGPRToAGPRSpillDead(FI);
1382 if (!SpillFIs.
empty() && SeenDbgInstr) {
1387 if (
MI.isDebugValue() &&
MI.getOperand(0).isFI() &&
1389 SpillFIs[
MI.getOperand(0).getIndex()]) {
1390 MI.getOperand(0).ChangeToRegister(
Register(),
false );
1400 bool HaveSGPRToVMemSpill =
1401 FuncInfo->removeDeadFrameIndices(MFI,
true);
1403 "SGPR spill should have been removed in SILowerSGPRSpills");
1409 assert(RS &&
"RegScavenger required if spilling");
1416 if (HaveSGPRToVMemSpill &&
1430 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1437 TRI->findUnusedRegister(
MRI, &AMDGPU::VGPR_32RegClass, MF);
1438 if (UnusedLowVGPR && (
TRI->getHWRegIndex(UnusedLowVGPR) <
1439 TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1443 FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
1444 MRI.freezeReservedRegs(MF);
1462 for (
unsigned I = 0; CSRegs[
I]; ++
I)
1473 const bool WillHaveFP =
1477 if (WillHaveFP ||
hasFP(MF)) {
1480 "Re-reserving spill slot for FP");
1484 if (
TRI->hasBasePointer(MF)) {
1487 "Re-reserving spill slot for BP");
1516 if (
MI.getOpcode() == AMDGPU::V_WRITELANE_B32)
1518 else if (
MI.getOpcode() == AMDGPU::V_READLANE_B32)
1529 if (!ST.hasGFX90AInsts())
1537 SavedVGPRs.
reset(Reg.first);
1562 const BitVector AllSavedRegs = SavedRegs;
1571 const bool WillHaveFP =
1575 if (WillHaveFP ||
hasFP(MF))
1585 Register RetAddrReg =
TRI->getReturnAddressReg(MF);
1587 (FrameInfo.
hasCalls() ||
MRI.isPhysRegModified(RetAddrReg))) {
1588 SavedRegs.
set(
TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1589 SavedRegs.
set(
TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1595 std::vector<CalleeSavedInfo> &CSI)
const {
1603 Register BasePtrReg = RI->getBaseRegister();
1604 Register SGPRForFPSaveRestoreCopy =
1605 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1606 Register SGPRForBPSaveRestoreCopy =
1607 FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);
1608 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1611 unsigned NumModifiedRegs = 0;
1613 if (SGPRForFPSaveRestoreCopy)
1615 if (SGPRForBPSaveRestoreCopy)
1618 for (
auto &CS : CSI) {
1619 if (CS.getReg() == FramePtrReg && SGPRForFPSaveRestoreCopy) {
1620 CS.setDstReg(SGPRForFPSaveRestoreCopy);
1621 if (--NumModifiedRegs)
1623 }
else if (CS.getReg() == BasePtrReg && SGPRForBPSaveRestoreCopy) {
1624 CS.setDstReg(SGPRForBPSaveRestoreCopy);
1625 if (--NumModifiedRegs)
1638 uint64_t EstStackSize = MFI.estimateStackSize(MF);
1639 uint64_t MaxOffset = EstStackSize - 1;
1648 if (ST.enableFlatScratch()) {
1665 int64_t Amount =
I->getOperand(0).getImm();
1672 unsigned Opc =
I->getOpcode();
1673 bool IsDestroy = Opc ==
TII->getCallFrameDestroyOpcode();
1674 uint64_t CalleePopAmount = IsDestroy ?
I->getOperand(1).getImm() : 0;
1678 assert(isUInt<32>(Amount) &&
"exceeded stack address space size");
1688 Add->getOperand(3).setIsDead();
1689 }
else if (CalleePopAmount != 0) {
1740 "only expected to call this for entry points");
unsigned const MachineRegisterInfo * MRI
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Provides AMDGPU specific target descriptions.
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
This file declares the machine register scavenger class.
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LivePhysRegs &LiveRegs, const TargetRegisterClass &RC)
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LivePhysRegs &LiveRegs, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass)
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, const TargetRegisterClass &RC, bool Unused=false)
static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isEntryFunction() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
bool test(unsigned Idx) const
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
bool any() const
any - Returns true if any bit is set.
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
iterator_range< const_set_bits_iterator > set_bits() const
bool empty() const
empty - Tests whether there are no bits in this bitvector.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
If the specified machine instruction is a direct store to a stack slot, return the virtual or physica...
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
A set of physical registers with utility functions to track liveness when walking backward/forward th...
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
void stepBackward(const MachineInstr &MI)
Simulates liveness when stepping backwards over an instruction(bundle).
void removeReg(MCPhysReg Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set.
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
void addLiveIns(const MachineBasicBlock &MBB)
Adds all live-in registers of basic block MBB.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
bool empty() const
Returns true if the set is empty.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setIsDead(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LivePhysRegs &LiveRegs, Register FrameReg)
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs) const
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
bool requiresStackPointerReference(const MachineFunction &MF) const
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs, Register FrameReg, Register FramePtrRegScratchCopy) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs, Register FrameReg, Register FramePtrRegScratchCopy) const
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
bool isSupportedStackID(TargetStackID::Value ID) const override
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
static bool isLegalMUBUFImmOffset(unsigned Imm)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const WWMSpillsMap & getWWMSpills() const
unsigned getNumPreloadedSGPRs() const
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
bool hasFlatScratchInit() const
bool hasImplicitBufferPtr() const
ArrayRef< SIRegisterInfo::SpilledReg > getPrologEpilogSGPRSpillToVGPRLanes(int FrameIndex) const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool IsPrologEpilog=false)
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
Register getFrameOffsetReg() const
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Register getImplicitBufferPtrUserSGPR() const
unsigned getGITPtrHigh() const
bool hasSpilledSGPRs() const
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
void setScratchRSrcReg(Register Reg)
Register getFrameRegister(const MachineFunction &MF) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
int64_t getFixed() const
Returns the fixed component of the stack.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isCompute(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
@ And
Bitwise or logical AND of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.