LLVM 17.0.0git
SILowerSGPRSpills.cpp
Go to the documentation of this file.
1//===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10// SGPR spills, so must insert CSR SGPR spills as well as expand them.
11//
12// This pass must never create new SGPR virtual registers.
13//
14// FIXME: Must stop RegScavenger spills in later passes.
15//
16//===----------------------------------------------------------------------===//
17
18#include "AMDGPU.h"
19#include "GCNSubtarget.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "si-lower-sgpr-spills"
30
32
33namespace {
34
35class SILowerSGPRSpills : public MachineFunctionPass {
36private:
37 const SIRegisterInfo *TRI = nullptr;
38 const SIInstrInfo *TII = nullptr;
39 LiveIntervals *LIS = nullptr;
40 SlotIndexes *Indexes = nullptr;
41
42 // Save and Restore blocks of the current function. Typically there is a
43 // single save block, unless Windows EH funclets are involved.
44 MBBVector SaveBlocks;
45 MBBVector RestoreBlocks;
46
47public:
48 static char ID;
49
50 SILowerSGPRSpills() : MachineFunctionPass(ID) {}
51
52 void calculateSaveRestoreBlocks(MachineFunction &MF);
53 bool spillCalleeSavedRegs(MachineFunction &MF);
54
55 bool runOnMachineFunction(MachineFunction &MF) override;
56
57 void getAnalysisUsage(AnalysisUsage &AU) const override {
58 AU.setPreservesAll();
60 }
61};
62
63} // end anonymous namespace
64
65char SILowerSGPRSpills::ID = 0;
66
68 "SI lower SGPR spill instructions", false, false)
72 "SI lower SGPR spill instructions", false, false)
73
74char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
75
76/// Insert spill code for the callee-saved registers used in the function.
77static void insertCSRSaves(MachineBasicBlock &SaveBlock,
79 LiveIntervals *LIS) {
80 MachineFunction &MF = *SaveBlock.getParent();
84 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
85 const SIRegisterInfo *RI = ST.getRegisterInfo();
86
87 MachineBasicBlock::iterator I = SaveBlock.begin();
88 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
90
91 for (const CalleeSavedInfo &CS : CSI) {
92 // Insert the spill to the stack frame.
93 MCRegister Reg = CS.getReg();
94
95 MachineInstrSpan MIS(I, &SaveBlock);
96 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
97 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
98
99 // If this value was already livein, we probably have a direct use of the
100 // incoming register value, so don't kill at the spill point. This happens
101 // since we pass some special inputs (workgroup IDs) in the callee saved
102 // range.
103 const bool IsLiveIn = MRI.isLiveIn(Reg);
104 TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
105 RC, TRI, Register());
106
107 if (Indexes) {
108 assert(std::distance(MIS.begin(), I) == 1);
109 MachineInstr &Inst = *std::prev(I);
110 Indexes->insertMachineInstrInMaps(Inst);
111 }
112
113 if (LIS)
114 LIS->removeAllRegUnitsForPhysReg(Reg);
115 }
116 }
117}
118
119/// Insert restore code for the callee-saved registers used in the function.
120static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
122 SlotIndexes *Indexes, LiveIntervals *LIS) {
123 MachineFunction &MF = *RestoreBlock.getParent();
127 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
128 const SIRegisterInfo *RI = ST.getRegisterInfo();
129 // Restore all registers immediately before the return and any
130 // terminators that precede it.
132
133 // FIXME: Just emit the readlane/writelane directly
134 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
135 for (const CalleeSavedInfo &CI : reverse(CSI)) {
136 Register Reg = CI.getReg();
137 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
138 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
139
140 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI,
141 Register());
142 assert(I != RestoreBlock.begin() &&
143 "loadRegFromStackSlot didn't insert any code!");
144 // Insert in reverse order. loadRegFromStackSlot can insert
145 // multiple instructions.
146
147 if (Indexes) {
148 MachineInstr &Inst = *std::prev(I);
149 Indexes->insertMachineInstrInMaps(Inst);
150 }
151
152 if (LIS)
154 }
155 }
156}
157
158/// Compute the sets of entry and return blocks for saving and restoring
159/// callee-saved registers, and placing prolog and epilog code.
160void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
161 const MachineFrameInfo &MFI = MF.getFrameInfo();
162
163 // Even when we do not change any CSR, we still want to insert the
164 // prologue and epilogue of the function.
165 // So set the save points for those.
166
167 // Use the points found by shrink-wrapping, if any.
168 if (MFI.getSavePoint()) {
169 SaveBlocks.push_back(MFI.getSavePoint());
170 assert(MFI.getRestorePoint() && "Both restore and save must be set");
171 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
172 // If RestoreBlock does not have any successor and is not a return block
173 // then the end point is unreachable and we do not need to insert any
174 // epilogue.
175 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
176 RestoreBlocks.push_back(RestoreBlock);
177 return;
178 }
179
180 // Save refs to entry and return blocks.
181 SaveBlocks.push_back(&MF.front());
182 for (MachineBasicBlock &MBB : MF) {
183 if (MBB.isEHFuncletEntry())
184 SaveBlocks.push_back(&MBB);
185 if (MBB.isReturnBlock())
186 RestoreBlocks.push_back(&MBB);
187 }
188}
189
190// TODO: To support shrink wrapping, this would need to copy
191// PrologEpilogInserter's updateLiveness.
193 MachineBasicBlock &EntryBB = MF.front();
194
195 for (const CalleeSavedInfo &CSIReg : CSI)
196 EntryBB.addLiveIn(CSIReg.getReg());
197 EntryBB.sortUniqueLiveIns();
198}
199
200bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
202 const Function &F = MF.getFunction();
204 const SIFrameLowering *TFI = ST.getFrameLowering();
205 MachineFrameInfo &MFI = MF.getFrameInfo();
206 RegScavenger *RS = nullptr;
207
208 // Determine which of the registers in the callee save list should be saved.
209 BitVector SavedRegs;
210 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
211
212 // Add the code to save and restore the callee saved registers.
213 if (!F.hasFnAttribute(Attribute::Naked)) {
214 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
215 // necessary for verifier liveness checks.
216 MFI.setCalleeSavedInfoValid(true);
217
218 std::vector<CalleeSavedInfo> CSI;
219 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
220
221 for (unsigned I = 0; CSRegs[I]; ++I) {
222 MCRegister Reg = CSRegs[I];
223
224 if (SavedRegs.test(Reg)) {
225 const TargetRegisterClass *RC =
226 TRI->getMinimalPhysRegClass(Reg, MVT::i32);
227 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
228 TRI->getSpillAlign(*RC), true);
229
230 CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
231 }
232 }
233
234 if (!CSI.empty()) {
235 for (MachineBasicBlock *SaveBlock : SaveBlocks)
236 insertCSRSaves(*SaveBlock, CSI, Indexes, LIS);
237
238 // Add live ins to save blocks.
239 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");
240 updateLiveness(MF, CSI);
241
242 for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
243 insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS);
244 return true;
245 }
246 }
247
248 return false;
249}
250
251bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
253 TII = ST.getInstrInfo();
254 TRI = &TII->getRegisterInfo();
255
256 LIS = getAnalysisIfAvailable<LiveIntervals>();
257 Indexes = getAnalysisIfAvailable<SlotIndexes>();
258
259 assert(SaveBlocks.empty() && RestoreBlocks.empty());
260
261 // First, expose any CSR SGPR spills. This is mostly the same as what PEI
262 // does, but somewhat simpler.
263 calculateSaveRestoreBlocks(MF);
264 bool HasCSRs = spillCalleeSavedRegs(MF);
265
266 MachineFrameInfo &MFI = MF.getFrameInfo();
269
270 if (!MFI.hasStackObjects() && !HasCSRs) {
271 SaveBlocks.clear();
272 RestoreBlocks.clear();
273 return false;
274 }
275
276 bool MadeChange = false;
277 bool NewReservedRegs = false;
278
279 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
280 // handled as SpilledToReg in regular PrologEpilogInserter.
281 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
282 (HasCSRs || FuncInfo->hasSpilledSGPRs());
283 if (HasSGPRSpillToVGPR) {
284 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
285 // are spilled to VGPRs, in which case we can eliminate the stack usage.
286 //
287 // This operates under the assumption that only other SGPR spills are users
288 // of the frame index.
289
290 // To track the spill frame indices handled in this pass.
291 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
292
293 for (MachineBasicBlock &MBB : MF) {
295 if (!TII->isSGPRSpill(MI))
296 continue;
297
298 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
300 if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
301 NewReservedRegs = true;
302 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
303 MI, FI, nullptr, Indexes, LIS);
304 (void)Spilled;
305 assert(Spilled && "failed to spill SGPR to VGPR when allocated");
306 SpillFIs.set(FI);
307 }
308 }
309 }
310
311 // FIXME: Adding to live-ins redundant with reserving registers.
312 for (MachineBasicBlock &MBB : MF) {
313 for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
314 MBB.addLiveIn(Reg);
316
317 // FIXME: The dead frame indices are replaced with a null register from
318 // the debug value instructions. We should instead, update it with the
319 // correct register value. But not sure the register value alone is
320 // adequate to lower the DIExpression. It should be worked out later.
321 for (MachineInstr &MI : MBB) {
322 if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
323 !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) &&
324 SpillFIs[MI.getOperand(0).getIndex()]) {
325 MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
326 }
327 }
328 }
329
330 // All those frame indices which are dead by now should be removed from the
331 // function frame. Otherwise, there is a side effect such as re-mapping of
332 // free frame index ids by the later pass(es) like "stack slot coloring"
333 // which in turn could mess-up with the book keeping of "frame index to VGPR
334 // lane".
335 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
336
337 MadeChange = true;
338 }
339
340 SaveBlocks.clear();
341 RestoreBlocks.clear();
342
343 // Updated the reserved registers with any VGPRs added for SGPR spills.
344 if (NewReservedRegs)
345 MRI.freezeReservedRegs(MF);
346
347 return MadeChange;
348}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
Provides AMDGPU specific target descriptions.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void updateLiveness(MachineFunction &MF, ArrayRef< CalleeSavedInfo > CSI)
static void insertCSRRestores(MachineBasicBlock &RestoreBlock, MutableArrayRef< CalleeSavedInfo > CSI, SlotIndexes *Indexes, LiveIntervals *LIS)
Insert restore code for the callee-saved registers used in the function.
SI lower SGPR spill instructions
static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef< CalleeSavedInfo > CSI, SlotIndexes *Indexes, LiveIntervals *LIS)
Insert spill code for the callee-saved registers used in the function.
#define DEBUG_TYPE
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool test(unsigned Idx) const
Definition: BitVector.h:454
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
void push_back(MachineInstr *MI)
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
MachineBasicBlock * getRestorePoint() const
void setCalleeSavedInfoValid(bool v)
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineBasicBlock * getSavePoint() const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...
MachineBasicBlock::iterator begin()
Representation of each machine instruction.
Definition: MachineInstr.h:68
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool IsPrologEpilog=false)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
ArrayRef< Register > getSGPRSpillVGPRs() const
SlotIndexes pass.
Definition: SlotIndexes.h:319
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:540
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Information about stack frame layout on the target.
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:721
char & SILowerSGPRSpillsID
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:484