21#define DEBUG_TYPE "frame-info"
24 "amdgpu-spill-vgpr-to-agpr",
25 cl::desc(
"Enable spilling VGPRs to AGPRs"),
38 if (!
MRI.isPhysRegUsed(Reg) && LiveUnits.
available(Reg) &&
54 for (
unsigned i = 0; CSRegs[i]; ++i)
55 LiveUnits.
addReg(CSRegs[i]);
75 bool IncludeScratchCopy =
true) {
81 unsigned Size =
TRI->getSpillSize(RC);
82 Align Alignment =
TRI->getSpillAlign(RC);
90 if (IncludeScratchCopy)
94 int FI = FrameInfo.CreateStackObject(
Size, Alignment,
true,
nullptr,
97 if (
TRI->spillSGPRToVGPR() &&
104 SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));
114 FI = FrameInfo.CreateSpillStackObject(
Size, Alignment);
124 SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
125 LiveUnits.
addReg(ScratchSGPR);
140 int64_t DwordOff = 0) {
141 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
142 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
149 LiveUnits.
addReg(SpillReg);
151 TRI.buildSpillLoadStore(
MBB,
I,
DL, Opc, FI, SpillReg, IsKill, FrameReg,
152 DwordOff, MMO,
nullptr, &LiveUnits);
164 Register FrameReg, int64_t DwordOff = 0) {
165 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
166 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
173 TRI.buildSpillLoadStore(
MBB,
I,
DL, Opc, FI, SpillReg,
false, FrameReg,
174 DwordOff, MMO,
nullptr, &LiveUnits);
184 Register TargetLo =
TRI->getSubReg(TargetReg, AMDGPU::sub0);
185 Register TargetHi =
TRI->getSubReg(TargetReg, AMDGPU::sub1);
192 const MCInstrDesc &GetPC64 =
TII->get(AMDGPU::S_GETPC_B64_pseudo);
206 if (LiveUnits.
empty()) {
240 unsigned EltSize = 4;
242 void saveToMemory(
const int FI)
const {
249 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
253 for (
unsigned I = 0, DwordOff = 0;
I < NumSubRegs; ++
I) {
261 FI, FrameReg, DwordOff);
266 void saveToVGPRLane(
const int FI)
const {
272 assert(Spill.size() == NumSubRegs);
274 for (
unsigned I = 0;
I < NumSubRegs; ++
I) {
286 void copyToScratchSGPR(
Register DstReg)
const {
292 void restoreFromMemory(
const int FI) {
297 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
301 for (
unsigned I = 0, DwordOff = 0;
I < NumSubRegs; ++
I) {
307 TmpVGPR, FI, FrameReg, DwordOff);
314 void restoreFromVGPRLane(
const int FI) {
318 assert(Spill.size() == NumSubRegs);
320 for (
unsigned I = 0;
I < NumSubRegs; ++
I) {
330 void copyFromScratchSGPR(
Register SrcReg)
const {
345 ST(MF.getSubtarget<
GCNSubtarget>()), MFI(MF.getFrameInfo()),
347 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits),
DL(
DL),
350 SplitParts =
TRI.getRegSplitParts(RC, EltSize);
351 NumSubRegs = SplitParts.
empty() ? 1 : SplitParts.
size();
353 assert(SuperReg != AMDGPU::M0 &&
"m0 should never spill");
357 switch (SI.getKind()) {
359 return saveToMemory(SI.getIndex());
361 return saveToVGPRLane(SI.getIndex());
363 return copyToScratchSGPR(SI.getReg());
368 switch (SI.getKind()) {
370 return restoreFromMemory(SI.getIndex());
372 return restoreFromVGPRLane(SI.getIndex());
374 return copyFromScratchSGPR(SI.getReg());
382void SIFrameLowering::emitEntryFunctionFlatScratchInit(
403 if (
ST.isAmdPalOS()) {
411 Register FlatScrInit = AMDGPU::NoRegister;
414 AllSGPR64s = AllSGPR64s.
slice(
415 std::min(
static_cast<unsigned>(AllSGPR64s.
size()), NumPreloaded));
419 MRI.isAllocatable(Reg) && !
TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
424 assert(FlatScrInit &&
"Failed to find free register for scratch init");
426 FlatScrInitLo =
TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
427 FlatScrInitHi =
TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
434 const MCInstrDesc &LoadDwordX2 =
TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
455 And->getOperand(3).setIsDead();
459 assert(FlatScratchInitReg);
462 MRI.addLiveIn(FlatScratchInitReg);
465 FlatScrInitLo =
TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
466 FlatScrInitHi =
TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
470 if (
ST.flatScratchIsPointer()) {
474 .
addReg(ScratchWaveOffsetReg);
481 using namespace AMDGPU::Hwreg;
484 .
addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
487 .
addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
494 .
addReg(ScratchWaveOffsetReg);
514 .
addReg(ScratchWaveOffsetReg);
537Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
550 if (!ScratchRsrcReg || (!
MRI.isPhysRegUsed(ScratchRsrcReg) &&
554 if (
ST.hasSGPRInitBug() ||
555 ScratchRsrcReg !=
TRI->reservedPrivateSegmentBufferReg(MF))
556 return ScratchRsrcReg;
569 AllSGPR128s = AllSGPR128s.
slice(std::min(
static_cast<unsigned>(AllSGPR128s.
size()), NumPreloaded));
578 if (!
MRI.isPhysRegUsed(Reg) &&
MRI.isAllocatable(Reg) &&
579 (!GITPtrLoReg || !
TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
580 MRI.replaceRegWith(ScratchRsrcReg, Reg);
587 return ScratchRsrcReg;
591 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
596 assert(&MF.
front() == &
MBB &&
"Shrink-wrapping not yet supported");
629 if (!ST.enableFlatScratch())
630 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
633 if (ScratchRsrcReg) {
635 if (&OtherBB != &
MBB) {
636 OtherBB.addLiveIn(ScratchRsrcReg);
644 if (ST.isAmdHsaOrMesa(
F)) {
645 PreloadedScratchRsrcReg =
647 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
650 MRI.addLiveIn(PreloadedScratchRsrcReg);
666 if (PreloadedScratchWaveOffsetReg &&
667 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
670 AllSGPRs = AllSGPRs.
slice(
671 std::min(
static_cast<unsigned>(AllSGPRs.
size()), NumPreloaded));
674 if (!
MRI.isPhysRegUsed(Reg) &&
MRI.isAllocatable(Reg) &&
675 !
TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
676 ScratchWaveOffsetReg = Reg;
685 if (!ScratchWaveOffsetReg)
687 "could not find temporary scratch offset register in prolog");
689 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
691 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
695 assert(FPReg != AMDGPU::FP_REG);
701 assert(SPReg != AMDGPU::SP_REG);
706 bool NeedsFlatScratchInit =
708 (
MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.
hasCalls() ||
711 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
712 PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
713 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
717 if (NeedsFlatScratchInit) {
718 emitEntryFunctionFlatScratchInit(MF,
MBB,
I,
DL, ScratchWaveOffsetReg);
721 if (ScratchRsrcReg) {
722 emitEntryFunctionScratchRsrcRegSetup(MF,
MBB,
I,
DL,
723 PreloadedScratchRsrcReg,
724 ScratchRsrcReg, ScratchWaveOffsetReg);
729void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
740 if (ST.isAmdPalOS()) {
743 Register Rsrc01 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
744 Register Rsrc03 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
751 const MCInstrDesc &LoadDwordX4 =
TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
780 }
else if (
ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
784 Register Rsrc2 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
785 Register Rsrc3 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
791 Register Rsrc01 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
800 const MCInstrDesc &LoadDwordX2 =
TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
819 Register Rsrc0 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
820 Register Rsrc1 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
832 .
addImm(Rsrc23 & 0xffffffff)
838 }
else if (
ST.isAmdHsaOrMesa(Fn)) {
839 assert(PreloadedScratchRsrcReg);
841 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
856 Register ScratchRsrcSub0 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
857 Register ScratchRsrcSub1 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
863 .
addReg(ScratchWaveOffsetReg)
865 auto Addc =
BuildMI(
MBB,
I,
DL,
TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
892 bool EnableInactiveLanes) {
903 MRI, LiveUnits, *
TRI.getWaveMaskRegClass());
904 if (!ScratchExecCopy)
907 LiveUnits.
addReg(ScratchExecCopy);
909 const unsigned SaveExecOpc =
910 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
911 : AMDGPU::S_OR_SAVEEXEC_B32)
912 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
913 : AMDGPU::S_OR_SAVEEXEC_B64);
918 return ScratchExecCopy;
936 if (!WWMScratchRegs.
empty())
941 auto StoreWWMRegisters =
943 for (
const auto &Reg : WWMRegs) {
951 StoreWWMRegisters(WWMScratchRegs);
952 if (!WWMCalleeSavedRegs.
empty()) {
953 if (ScratchExecCopy) {
954 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
963 StoreWWMRegisters(WWMCalleeSavedRegs);
964 if (ScratchExecCopy) {
966 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
969 LiveUnits.
addReg(ScratchExecCopy);
980 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
985 LiveUnits, FrameReg);
993 if (!ScratchSGPRs.
empty()) {
1000 if (!LiveUnits.
empty()) {
1024 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1029 LiveUnits, FrameReg);
1039 if (!WWMScratchRegs.
empty())
1044 auto RestoreWWMRegisters =
1046 for (
const auto &Reg : WWMRegs) {
1048 int FI = Reg.second;
1050 VGPR, FI, FrameReg);
1054 RestoreWWMRegisters(WWMScratchRegs);
1055 if (!WWMCalleeSavedRegs.
empty()) {
1056 if (ScratchExecCopy) {
1057 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1066 RestoreWWMRegisters(WWMCalleeSavedRegs);
1067 if (ScratchExecCopy) {
1069 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1105 assert(StackPtrReg != AMDGPU::SP_REG);
1117 if (
TRI.hasStackRealignment(MF))
1121 if (!HasFP && !
hasFP(MF)) {
1125 FramePtrRegScratchCopy);
1128 Register SGPRForFPSaveRestoreCopy =
1132 if (SGPRForFPSaveRestoreCopy) {
1139 DL,
TII,
TRI, LiveUnits, FramePtrReg);
1141 LiveUnits.
addReg(SGPRForFPSaveRestoreCopy);
1146 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1147 if (!FramePtrRegScratchCopy)
1150 LiveUnits.
addReg(FramePtrRegScratchCopy);
1159 RoundedSize += Alignment;
1160 if (LiveUnits.
empty()) {
1175 And->getOperand(3).setIsDead();
1177 }
else if ((HasFP =
hasFP(MF))) {
1186 FramePtrRegScratchCopy);
1187 if (FramePtrRegScratchCopy)
1188 LiveUnits.
removeReg(FramePtrRegScratchCopy);
1195 if ((HasBP =
TRI.hasBasePointer(MF))) {
1201 if (HasFP && RoundedSize != 0) {
1206 Add->getOperand(3).setIsDead();
1211 assert((!HasFP || FPSaved) &&
1212 "Needed to save FP but didn't save it anywhere");
1217 "Saved FP but didn't need it");
1221 assert((!HasBP || BPSaved) &&
1222 "Needed to save BP but didn't save it anywhere");
1224 assert((HasBP || !BPSaved) &&
"Saved BP but didn't need it");
1245 DL =
MBBI->getDebugLoc();
1260 Register SGPRForFPSaveRestoreCopy =
1268 if (SGPRForFPSaveRestoreCopy) {
1269 LiveUnits.
addReg(SGPRForFPSaveRestoreCopy);
1272 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1273 if (!FramePtrRegScratchCopy)
1276 LiveUnits.
addReg(FramePtrRegScratchCopy);
1280 FramePtrRegScratchCopy);
1283 if (RoundedSize != 0 &&
hasFP(MF)) {
1288 Add->getOperand(3).setIsDead();
1293 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1294 : FramePtrRegScratchCopy;
1298 if (SGPRForFPSaveRestoreCopy)
1303 FramePtrRegScratchCopy);
1347 bool IsChainWithoutCalls =
1353 TRI->getSpillAlign(*RC));
1357 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->
hasSpilledVGPRs()
1360 if (SpillVGPRToAGPR) {
1365 bool SeenDbgInstr =
false;
1370 if (
MI.isDebugInstr())
1371 SeenDbgInstr =
true;
1373 if (
TII->isVGPRSpill(
MI)) {
1377 AMDGPU::OpName::vaddr);
1378 int FI =
MI.getOperand(FIOp).getIndex();
1380 TII->getNamedOperand(
MI, AMDGPU::OpName::vdata)->getReg();
1382 TRI->isAGPR(
MRI, VReg))) {
1386 TRI->eliminateFrameIndex(
MI, 0, FIOp, RS);
1393 NonVGPRSpillFIs.
set(FrameIndex);
1399 for (
unsigned FI : SpillFIs.
set_bits())
1400 if (!NonVGPRSpillFIs.
test(FI))
1412 if (!SpillFIs.
empty() && SeenDbgInstr) {
1417 if (
MI.isDebugValue() &&
MI.getOperand(0).isFI() &&
1419 SpillFIs[
MI.getOperand(0).getIndex()]) {
1420 MI.getOperand(0).ChangeToRegister(
Register(),
false );
1430 bool HaveSGPRToVMemSpill =
1433 "SGPR spill should have been removed in SILowerSGPRSpills");
1439 assert(RS &&
"RegScavenger required if spilling");
1446 if (HaveSGPRToVMemSpill &&
1460 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1467 TRI->findUnusedRegister(
MRI, &AMDGPU::VGPR_32RegClass, MF);
1468 if (UnusedLowVGPR && (
TRI->getHWRegIndex(UnusedLowVGPR) <
1469 TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1475 MRI.reserveReg(UnusedLowVGPR,
TRI);
1482 TRI->findUnusedRegister(
MRI, &AMDGPU::SGPR_64RegClass, MF);
1487 if (LongBranchReservedReg && UnusedLowSGPR) {
1489 MRI.reserveReg(UnusedLowSGPR,
TRI);
1497 bool NeedExecCopyReservedReg)
const {
1508 for (
unsigned I = 0; CSRegs[
I]; ++
I)
1514 if (NeedExecCopyReservedReg ||
1515 (ReservedRegForExecCopy &&
1516 MRI.isPhysRegUsed(ReservedRegForExecCopy,
true))) {
1517 MRI.reserveReg(ReservedRegForExecCopy,
TRI);
1519 if (UnusedScratchReg) {
1523 MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);
1524 LiveUnits.
addReg(UnusedScratchReg);
1528 "Re-reserving spill slot for EXEC copy register");
1532 }
else if (ReservedRegForExecCopy) {
1546 const bool WillHaveFP =
1550 if (WillHaveFP ||
hasFP(MF)) {
1553 "Re-reserving spill slot for FP");
1557 if (
TRI->hasBasePointer(MF)) {
1560 "Re-reserving spill slot for BP");
1586 bool NeedExecCopyReservedReg =
false;
1599 if (
MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
1601 else if (
MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
1603 else if (
TII->isWWMRegSpillOpcode(
MI.getOpcode()))
1604 NeedExecCopyReservedReg =
true;
1605 else if (
MI.getOpcode() == AMDGPU::SI_RETURN ||
1606 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
1608 TII->isChainCallOpcode(
MI.getOpcode()))) {
1611 (
count_if(
MI.operands(), [](
auto Op) { return Op.isReg(); }) ==
1623 SavedVGPRs.
reset(
Op.getReg());
1633 if (!ST.hasGFX90AInsts())
1641 SavedVGPRs.
reset(Reg.first);
1666 const BitVector AllSavedRegs = SavedRegs;
1675 const bool WillHaveFP =
1679 if (WillHaveFP ||
hasFP(MF))
1689 Register RetAddrReg =
TRI->getReturnAddressReg(MF);
1691 (FrameInfo.
hasCalls() ||
MRI.isPhysRegModified(RetAddrReg))) {
1692 SavedRegs.
set(
TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1693 SavedRegs.
set(
TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1699 std::vector<CalleeSavedInfo> &CSI)
const {
1707 Register BasePtrReg = RI->getBaseRegister();
1708 Register SGPRForFPSaveRestoreCopy =
1710 Register SGPRForBPSaveRestoreCopy =
1712 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1715 unsigned NumModifiedRegs = 0;
1717 if (SGPRForFPSaveRestoreCopy)
1719 if (SGPRForBPSaveRestoreCopy)
1722 for (
auto &CS : CSI) {
1723 if (CS.getReg() == FramePtrReg && SGPRForFPSaveRestoreCopy) {
1724 CS.setDstReg(SGPRForFPSaveRestoreCopy);
1725 if (--NumModifiedRegs)
1727 }
else if (CS.getReg() == BasePtrReg && SGPRForBPSaveRestoreCopy) {
1728 CS.setDstReg(SGPRForBPSaveRestoreCopy);
1729 if (--NumModifiedRegs)
1743 uint64_t EstStackSize = MFI.estimateStackSize(MF);
1744 uint64_t MaxOffset = EstStackSize - 1;
1753 if (ST.enableFlatScratch()) {
1758 if (
TII->isLegalMUBUFImmOffset(MaxOffset))
1769 int64_t Amount =
I->getOperand(0).getImm();
1776 unsigned Opc =
I->getOpcode();
1777 bool IsDestroy = Opc ==
TII->getCallFrameDestroyOpcode();
1778 uint64_t CalleePopAmount = IsDestroy ?
I->getOperand(1).getImm() : 0;
1782 assert(isUInt<32>(Amount) &&
"exceeded stack address space size");
1792 Add->getOperand(3).setIsDead();
1793 }
else if (CalleePopAmount != 0) {
1848 "only expected to call this for entry points and chain functions");
unsigned const MachineRegisterInfo * MRI
static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB)
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
This file declares the machine register scavenger class.
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isChainFunction() const
bool isEntryFunction() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
bool test(unsigned Idx) const
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
bool any() const
any - Returns true if any bit is set.
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
iterator_range< const_set_bits_iterator > set_bits() const
bool empty() const
empty - Tests whether there are no bits in this bitvector.
This class represents an Operation in the Expression.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool hasImplicitBufferPtr() const
bool hasFlatScratchInit() const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
If the specified machine instruction is a direct store to a stack slot, return the virtual or physica...
A set of register units used to track register liveness.
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
void addReg(MCPhysReg Reg)
Adds register units covered by physical register Reg.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
bool empty() const
Returns true if the set is empty.
void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void removeReg(MCPhysReg Reg)
Removes all register units covered by physical register Reg.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasTailCall() const
Returns true if the function contains a tail call.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
iterator_range< mop_iterator > operands()
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setIsDead(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
void backward()
Update internal register state and move MBB iterator backwards.
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
bool requiresStackPointerReference(const MachineFunction &MF) const
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
bool isSupportedStackID(TargetStackID::Value ID) const override
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
const WWMSpillsMap & getWWMSpills() const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void setSGPRForEXECCopy(Register Reg)
unsigned getNumPreloadedSGPRs() const
GCNUserSGPRUsageInfo & getUserSGPRInfo()
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register getLongBranchReservedReg() const
bool hasSpilledVGPRs() const
void setVGPRToAGPRSpillDead(int FrameIndex)
Register getStackPtrOffsetReg() const
bool isStackRealigned() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
Register getSGPRForEXECCopy() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
void shiftSpillPhysVGPRsToLowestRange(MachineFunction &MF)
Register getVGPRForAGPRCopy() const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
Register getFrameOffsetReg() const
void setLongBranchReservedReg(Register Reg)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
const ReservedRegSet & getWWMReservedRegs() const
Register getImplicitBufferPtrUserSGPR() const
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setIsStackRealigned(bool Realigned=true)
unsigned getGITPtrHigh() const
bool hasSpilledSGPRs() const
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
void setScratchRSrcReg(Register Reg)
Register getFrameRegister(const MachineFunction &MF) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
int64_t getFixed() const
Returns the fixed component of the stack.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isCompute(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
@ And
Bitwise or logical AND of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.