LLVM 23.0.0git
SILowerSGPRSpills.cpp
Go to the documentation of this file.
1//===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10// SGPR spills, so must insert CSR SGPR spills as well as expand them.
11//
12// This pass must never create new SGPR virtual registers.
13//
14// FIXME: Must stop RegScavenger spills in later passes.
15//
16//===----------------------------------------------------------------------===//
17
18#include "SILowerSGPRSpills.h"
19#include "AMDGPU.h"
20#include "GCNSubtarget.h"
23#include "SISpillUtils.h"
30
31using namespace llvm;
32
33#define DEBUG_TYPE "si-lower-sgpr-spills"
34
36
37namespace {
38
39/// Insertion point for IMPLICIT_DEF: iterator may be MBB::end() and can't be
40/// dereferenced so the parent block is stored explicitly.
41struct LaneVGPRInsertPt {
44};
45
46static LaneVGPRInsertPt insertPt(MachineBasicBlock *MBB,
48 return {MBB, It};
49}
50
51static cl::opt<unsigned> MaxNumVGPRsForWwmAllocation(
52 "amdgpu-num-vgprs-for-wwm-alloc",
53 cl::desc("Max num VGPRs for whole-wave register allocation."),
55
56class SILowerSGPRSpills {
57private:
58 const SIRegisterInfo *TRI = nullptr;
59 const SIInstrInfo *TII = nullptr;
60 LiveIntervals *LIS = nullptr;
61 SlotIndexes *Indexes = nullptr;
62 MachineDominatorTree *MDT = nullptr;
63 MachineCycleInfo *MCI = nullptr;
64
65 // Save and Restore blocks of the current function. Typically there is a
66 // single save block, unless Windows EH funclets are involved.
67 MBBVector SaveBlocks;
68 MBBVector RestoreBlocks;
69
70 MachineBasicBlock *getCycleDomBB(MachineCycle *C);
71
72public:
73 SILowerSGPRSpills(LiveIntervals *LIS, SlotIndexes *Indexes,
75 : LIS(LIS), Indexes(Indexes), MDT(MDT), MCI(MCI) {}
76 bool run(MachineFunction &MF);
77 void calculateSaveRestoreBlocks(MachineFunction &MF);
78 bool spillCalleeSavedRegs(MachineFunction &MF,
79 SmallVectorImpl<int> &CalleeSavedFIs);
80 void updateLaneVGPRDomInstr(
82 DenseMap<Register, LaneVGPRInsertPt> &LaneVGPRDomInstr);
83 void determineRegsForWWMAllocation(MachineFunction &MF, BitVector &RegMask);
84};
85
86class SILowerSGPRSpillsLegacy : public MachineFunctionPass {
87public:
88 static char ID;
89
90 SILowerSGPRSpillsLegacy() : MachineFunctionPass(ID) {}
91
92 bool runOnMachineFunction(MachineFunction &MF) override;
93
94 void getAnalysisUsage(AnalysisUsage &AU) const override {
97 AU.setPreservesAll();
99 }
100
101 MachineFunctionProperties getClearedProperties() const override {
102 // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs.
103 return MachineFunctionProperties().setIsSSA().setNoVRegs();
104 }
105};
106
107} // end anonymous namespace
108
109char SILowerSGPRSpillsLegacy::ID = 0;
110
111INITIALIZE_PASS_BEGIN(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
112 "SI lower SGPR spill instructions", false, false)
117INITIALIZE_PASS_END(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
118 "SI lower SGPR spill instructions", false, false)
119
120char &llvm::SILowerSGPRSpillsLegacyID = SILowerSGPRSpillsLegacy::ID;
121
122/// Insert spill code for the callee-saved registers used in the function.
124 ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes,
125 LiveIntervals *LIS) {
126 const TargetFrameLowering *TFI = ST.getFrameLowering();
127 const TargetRegisterInfo *TRI = ST.getRegisterInfo();
128 MachineBasicBlock::iterator I = SaveBlock.begin();
129 MachineInstrSpan MIS(I, &SaveBlock);
130 bool Success = TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI);
131 assert(Success && "spillCalleeSavedRegisters should always succeed");
132 (void)Success;
133
134 // TFI doesn't update Indexes and LIS, so we have to do it separately.
135 if (Indexes)
136 Indexes->repairIndexesInRange(&SaveBlock, SaveBlock.begin(), I);
137
138 if (LIS)
139 for (const CalleeSavedInfo &CS : CSI)
140 LIS->removeAllRegUnitsForPhysReg(CS.getReg());
141}
142
143/// Insert restore code for the callee-saved registers used in the function.
144static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
146 SlotIndexes *Indexes, LiveIntervals *LIS) {
147 MachineFunction &MF = *RestoreBlock.getParent();
151 // Restore all registers immediately before the return and any
152 // terminators that precede it.
154 const MachineBasicBlock::iterator BeforeRestoresI =
155 I == RestoreBlock.begin() ? I : std::prev(I);
156
157 // FIXME: Just emit the readlane/writelane directly
158 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
159 for (const CalleeSavedInfo &CI : reverse(CSI)) {
160 // Insert in reverse order. loadRegFromStackSlot can insert
161 // multiple instructions.
162 TFI->restoreCalleeSavedRegister(RestoreBlock, I, CI, &TII, TRI);
163
164 if (Indexes) {
165 MachineInstr &Inst = *std::prev(I);
166 Indexes->insertMachineInstrInMaps(Inst);
167 }
168
169 if (LIS)
170 LIS->removeAllRegUnitsForPhysReg(CI.getReg());
171 }
172 } else {
173 // TFI doesn't update Indexes and LIS, so we have to do it separately.
174 if (Indexes)
175 Indexes->repairIndexesInRange(&RestoreBlock, BeforeRestoresI,
176 RestoreBlock.getFirstTerminator());
177
178 if (LIS)
179 for (const CalleeSavedInfo &CS : CSI)
180 LIS->removeAllRegUnitsForPhysReg(CS.getReg());
181 }
182}
183
184/// Compute the sets of entry and return blocks for saving and restoring
185/// callee-saved registers, and placing prolog and epilog code.
186void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
187 const MachineFrameInfo &MFI = MF.getFrameInfo();
188
189 // Even when we do not change any CSR, we still want to insert the
190 // prologue and epilogue of the function.
191 // So set the save points for those.
192
193 // Use the points found by shrink-wrapping, if any.
194 if (!MFI.getSavePoints().empty()) {
195 assert(MFI.getSavePoints().size() == 1 &&
196 "Multiple save points not yet supported!");
197 const auto &SavePoint = *MFI.getSavePoints().begin();
198 SaveBlocks.push_back(SavePoint.first);
199 assert(MFI.getRestorePoints().size() == 1 &&
200 "Multiple restore points not yet supported!");
201 const auto &RestorePoint = *MFI.getRestorePoints().begin();
202 MachineBasicBlock *RestoreBlock = RestorePoint.first;
203 // If RestoreBlock does not have any successor and is not a return block
204 // then the end point is unreachable and we do not need to insert any
205 // epilogue.
206 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
207 RestoreBlocks.push_back(RestoreBlock);
208 return;
209 }
210
211 // Save refs to entry and return blocks.
212 SaveBlocks.push_back(&MF.front());
213 for (MachineBasicBlock &MBB : MF) {
214 if (MBB.isEHFuncletEntry())
215 SaveBlocks.push_back(&MBB);
216 if (MBB.isReturnBlock())
217 RestoreBlocks.push_back(&MBB);
218 }
219}
220
221// TODO: To support shrink wrapping, this would need to copy
222// PrologEpilogInserter's updateLiveness.
224 MachineBasicBlock &EntryBB = MF.front();
225
226 for (const CalleeSavedInfo &CSIReg : CSI)
227 EntryBB.addLiveIn(CSIReg.getReg());
228 EntryBB.sortUniqueLiveIns();
229}
230
231bool SILowerSGPRSpills::spillCalleeSavedRegs(
232 MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) {
233 MachineRegisterInfo &MRI = MF.getRegInfo();
234 const Function &F = MF.getFunction();
235 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
236 const SIFrameLowering *TFI = ST.getFrameLowering();
237 MachineFrameInfo &MFI = MF.getFrameInfo();
238 RegScavenger *RS = nullptr;
239
240 // Determine which of the registers in the callee save list should be saved.
241 BitVector SavedRegs;
242 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
243
244 // Add the code to save and restore the callee saved registers.
245 if (!F.hasFnAttribute(Attribute::Naked)) {
246 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
247 // necessary for verifier liveness checks.
248 MFI.setCalleeSavedInfoValid(true);
249
250 std::vector<CalleeSavedInfo> CSI;
251 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
252 MCRegister RetAddrReg = TRI->getReturnAddressReg(MF);
253 MCRegister RetAddrRegSub0 = TRI->getSubReg(RetAddrReg, AMDGPU::sub0);
254 MCRegister RetAddrRegSub1 = TRI->getSubReg(RetAddrReg, AMDGPU::sub1);
255 bool SpillRetAddrReg = false;
256
257 for (unsigned I = 0; CSRegs[I]; ++I) {
258 MCRegister Reg = CSRegs[I];
259
260 if (SavedRegs.test(Reg)) {
261 if (Reg == RetAddrRegSub0 || Reg == RetAddrRegSub1) {
262 SpillRetAddrReg = true;
263 continue;
264 }
265
266 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
267 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
268 TRI->getSpillAlign(*RC), true,
269 nullptr, TRI->getSpillStackID(*RC));
270
271 CSI.emplace_back(Reg, JunkFI);
272 CalleeSavedFIs.push_back(JunkFI);
273 }
274 }
275
276 // Return address uses a register pair. Add the super register to the
277 // CSI list so that it's easier to identify the entire spill and CFI
278 // can be emitted appropriately.
279 if (SpillRetAddrReg) {
280 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(RetAddrReg);
281 int JunkFI =
282 MFI.CreateStackObject(TRI->getSpillSize(*RC), TRI->getSpillAlign(*RC),
283 true, nullptr, TRI->getSpillStackID(*RC));
284 CSI.push_back(CalleeSavedInfo(RetAddrReg, JunkFI));
285 CalleeSavedFIs.push_back(JunkFI);
286 }
287
288 if (!CSI.empty()) {
289 for (MachineBasicBlock *SaveBlock : SaveBlocks)
290 insertCSRSaves(ST, *SaveBlock, CSI, Indexes, LIS);
291
292 // Add live ins to save blocks.
293 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");
294 updateLiveness(MF, CSI);
295
296 for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
297 insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS);
298 return true;
299 }
300 }
301
302 return false;
303}
304
305MachineBasicBlock *SILowerSGPRSpills::getCycleDomBB(MachineCycle *C) {
306 // If the insertion point lands on a cycle entry, move it to a block that
307 // dominates all entries.
308 if (C->isReducible()) {
309 if (auto *IDom = MDT->getNode(C->getHeader())->getIDom())
310 return IDom->getBlock();
311 llvm_unreachable("Expected cycle to have an IDom.");
312 return nullptr;
313 }
314
315 const SmallVectorImpl<MachineBasicBlock *> &Entries = C->getEntries();
316 assert(!Entries.empty() && "Expected cycle to have at least one entry.");
317 MachineBasicBlock *EntryBB = Entries[0];
318 for (unsigned I = 1; I < Entries.size(); ++I)
319 EntryBB = MDT->findNearestCommonDominator(EntryBB, Entries[I]);
320 return EntryBB;
321}
322
323void SILowerSGPRSpills::updateLaneVGPRDomInstr(
324 int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
325 DenseMap<Register, LaneVGPRInsertPt> &LaneVGPRDomInstr) {
326 // For the Def of a virtual LaneVGPR to dominate all its uses, we should
327 // insert an IMPLICIT_DEF before the dominating spill. Switching to a
328 // depth first order doesn't really help since the machine function can be in
329 // the unstructured control flow post-SSA. For each virtual register, hence
330 // finding the common dominator to get either the dominating spill or a block
331 // dominating all spills.
332 SIMachineFunctionInfo *FuncInfo =
333 MBB->getParent()->getInfo<SIMachineFunctionInfo>();
335 FuncInfo->getSGPRSpillToVirtualVGPRLanes(FI);
336 Register PrevLaneVGPR;
337 for (auto &Spill : VGPRSpills) {
338 if (PrevLaneVGPR == Spill.VGPR)
339 continue;
340
341 PrevLaneVGPR = Spill.VGPR;
342 auto I = LaneVGPRDomInstr.find(Spill.VGPR);
343 if (Spill.Lane == 0 && I == LaneVGPRDomInstr.end()) {
344 LaneVGPRDomInstr[Spill.VGPR] = insertPt(MBB, InsertPt);
345 } else {
346 assert(I != LaneVGPRDomInstr.end());
347 LaneVGPRInsertPt Prev = I->second;
348 MachineBasicBlock *PrevInsertMBB = Prev.MBB;
349 MachineBasicBlock::iterator PrevInsertPt = Prev.It;
350 MachineBasicBlock *DomMBB = PrevInsertMBB;
351 if (DomMBB == MBB) {
352 // The insertion point earlier selected in a predecessor block whose
353 // spills are currently being lowered. The earlier InsertPt would be
354 // the one just before the block terminator and it should be changed
355 // if we insert any new spill in it.
356 if (PrevInsertPt == MBB->end() ||
357 MDT->dominates(&*InsertPt, &*PrevInsertPt))
358 I->second = insertPt(MBB, InsertPt);
359
360 continue;
361 }
362
363 // Find the common dominator block between PrevInsertPt and the
364 // current spill.
365 DomMBB = MDT->findNearestCommonDominator(DomMBB, MBB);
366
367 if (DomMBB == MBB)
368 I->second = insertPt(MBB, InsertPt);
369 else if (DomMBB != PrevInsertMBB)
370 I->second = insertPt(DomMBB, DomMBB->getFirstTerminator());
371 }
372 }
373}
374
375void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
376 BitVector &RegMask) {
377 // Determine an optimal number of VGPRs for WWM allocation. The complement
378 // list will be available for allocating other VGPR virtual registers.
379 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
380 MachineRegisterInfo &MRI = MF.getRegInfo();
381 BitVector ReservedRegs = TRI->getReservedRegs(MF);
382 BitVector NonWwmAllocMask(TRI->getNumRegs());
383 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
384
385 // FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future
386 // to have a balanced allocation between WWM values and per-thread vector
387 // register operands.
388 unsigned NumRegs = MaxNumVGPRsForWwmAllocation;
389 NumRegs =
390 std::min(static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), NumRegs);
391
392 auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.getFunction());
393 // Try to use the highest available registers for now. Later after
394 // vgpr-regalloc, they can be shifted to the lowest range.
395 unsigned I = 0;
396 for (unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - 1;
397 (I < NumRegs) && (Reg >= AMDGPU::VGPR0); --Reg) {
398 if (!ReservedRegs.test(Reg) &&
399 !MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/true)) {
400 TRI->markSuperRegs(RegMask, Reg);
401 ++I;
402 }
403 }
404
405 if (I != NumRegs) {
406 // Reserve an arbitrary register and report the error.
407 TRI->markSuperRegs(RegMask, AMDGPU::VGPR0);
409 "cannot find enough VGPRs for wwm-regalloc");
410 }
411}
412
413bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) {
414 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
415 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
416 auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
417 SlotIndexes *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
418 MachineDominatorTree *MDT =
419 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
420 MachineCycleInfo *MCI =
421 &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
422 return SILowerSGPRSpills(LIS, Indexes, MDT, MCI).run(MF);
423}
424
425bool SILowerSGPRSpills::run(MachineFunction &MF) {
426 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
427 TII = ST.getInstrInfo();
428 TRI = &TII->getRegisterInfo();
429
430 assert(SaveBlocks.empty() && RestoreBlocks.empty());
431
432 // First, expose any CSR SGPR spills. This is mostly the same as what PEI
433 // does, but somewhat simpler.
434 calculateSaveRestoreBlocks(MF);
435 SmallVector<int> CalleeSavedFIs;
436 bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);
437
438 MachineFrameInfo &MFI = MF.getFrameInfo();
439 MachineRegisterInfo &MRI = MF.getRegInfo();
440 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
441
442 if (!MFI.hasStackObjects() && !HasCSRs) {
443 SaveBlocks.clear();
444 RestoreBlocks.clear();
445 return false;
446 }
447
448 bool MadeChange = false;
449 bool SpilledToVirtVGPRLanes = false;
450
451 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
452 // handled as SpilledToReg in regular PrologEpilogInserter.
453 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
454 (HasCSRs || FuncInfo->hasSpilledSGPRs());
455 if (HasSGPRSpillToVGPR) {
456 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
457 // are spilled to VGPRs, in which case we can eliminate the stack usage.
458 //
459 // This operates under the assumption that only other SGPR spills are users
460 // of the frame index.
461
462 // To track the spill frame indices handled in this pass.
463 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
464
465 // To track the IMPLICIT_DEF insertion point for the lane vgprs.
466 DenseMap<Register, LaneVGPRInsertPt> LaneVGPRDomInstr;
467
468 for (MachineBasicBlock &MBB : MF) {
469 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
470 if (!TII->isSGPRSpill(MI))
471 continue;
472
473 if (MI.getOperand(0).isUndef()) {
474 if (Indexes)
476 MI.eraseFromParent();
477 continue;
478 }
479
480 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
482
483 bool IsCalleeSaveSGPRSpill = llvm::is_contained(CalleeSavedFIs, FI);
484 if (IsCalleeSaveSGPRSpill) {
485 // Spill callee-saved SGPRs into physical VGPR lanes.
486
487 // TODO: This is to ensure the CFIs are static for efficient frame
488 // unwinding in the debugger. Spilling them into virtual VGPR lanes
489 // involve regalloc to allocate the physical VGPRs and that might
490 // cause intermediate spill/split of such liveranges for successful
491 // allocation. This would result in broken CFI encoding unless the
492 // regalloc aware CFI generation to insert new CFIs along with the
493 // intermediate spills is implemented. There is no such support
494 // currently exist in the LLVM compiler.
495 if (FuncInfo->allocateSGPRSpillToVGPRLane(
496 MF, FI, /*SpillToPhysVGPRLane=*/true)) {
497 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
498 MI, FI, nullptr, Indexes, LIS, true);
499 if (!Spilled)
501 "failed to spill SGPR to physical VGPR lane when allocated");
502 }
503 } else {
504 MachineInstrSpan MIS(&MI, &MBB);
505 if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
506 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
507 MI, FI, nullptr, Indexes, LIS);
508 if (!Spilled)
510 "failed to spill SGPR to virtual VGPR lane when allocated");
511 SpillFIs.set(FI);
512 updateLaneVGPRDomInstr(FI, &MBB, MIS.begin(), LaneVGPRDomInstr);
513 SpilledToVirtVGPRLanes = true;
514 }
515 }
516 }
517 }
518
519 for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) {
520 LaneVGPRInsertPt IP = LaneVGPRDomInstr[Reg];
521 if (MachineCycle *C = MCI->getTopLevelParentCycle(IP.MBB)) {
522 MachineBasicBlock *AdjMBB = getCycleDomBB(C);
523 IP = insertPt(AdjMBB, AdjMBB->getFirstTerminator());
524 }
525 // Insert the IMPLICIT_DEF at the identified points.
526 MachineBasicBlock &Block = *IP.MBB;
527 DebugLoc DL = Block.findDebugLoc(IP.It);
528 auto MIB = BuildMI(Block, IP.It, DL, TII->get(AMDGPU::IMPLICIT_DEF), Reg);
529
530 // Add WWM flag to the virtual register.
532
533 // Set SGPR_SPILL asm printer flag
534 MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
535 if (LIS) {
536 LIS->InsertMachineInstrInMaps(*MIB);
538 }
539 }
540
541 // Determine the registers for WWM allocation and also compute the register
542 // mask for non-wwm VGPR allocation.
543 if (FuncInfo->getSGPRSpillVGPRs().size()) {
544 BitVector WwmRegMask(TRI->getNumRegs());
545
546 determineRegsForWWMAllocation(MF, WwmRegMask);
547
548 BitVector NonWwmRegMask(WwmRegMask);
549 NonWwmRegMask.flip().clearBitsNotInMask(TRI->getAllVGPRRegMask());
550
551 // The complement set will be the registers for non-wwm (per-thread) vgpr
552 // allocation.
553 FuncInfo->updateNonWWMRegMask(NonWwmRegMask);
554 }
555
556 for (MachineBasicBlock &MBB : MF)
557 clearDebugInfoForSpillFIs(MFI, MBB, SpillFIs);
558
559 // All those frame indices which are dead by now should be removed from the
560 // function frame. Otherwise, there is a side effect such as re-mapping of
561 // free frame index ids by the later pass(es) like "stack slot coloring"
562 // which in turn could mess-up with the book keeping of "frame index to VGPR
563 // lane".
564 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
565
566 MadeChange = true;
567 }
568
569 if (SpilledToVirtVGPRLanes) {
570 const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();
571 // Shift back the reserved SGPR for EXEC copy into the lowest range.
572 // This SGPR is reserved to handle the whole-wave spill/copy operations
573 // that might get inserted during vgpr regalloc.
574 Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF);
575 if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) <
576 TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy()))
577 FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR);
578 } else {
579 // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
580 // spills/copies. Reset the SGPR reserved for EXEC copy.
581 FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister);
582 }
583
584 SaveBlocks.clear();
585 RestoreBlocks.clear();
586
587 return MadeChange;
588}
589
590PreservedAnalyses
593 MFPropsModifier _(*this, MF);
594 auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
595 auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF);
598 SILowerSGPRSpills(LIS, Indexes, MDT, &MCI).run(MF);
599 return PreservedAnalyses::all();
600}
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static void insertCSRRestores(MachineBasicBlock &RestoreBlock, std::vector< CalleeSavedInfo > &CSI)
Insert restore code for the callee-saved registers used in the function.
SmallVector< MachineBasicBlock *, 4 > MBBVector
static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef< CalleeSavedInfo > CSI)
Insert spill code for the callee-saved registers used in the function.
static void updateLiveness(MachineFunction &MF)
Helper function to update the liveness information for the callee-saved registers.
This file declares the machine register scavenger class.
static void insertCSRRestores(MachineBasicBlock &RestoreBlock, MutableArrayRef< CalleeSavedInfo > CSI, SlotIndexes *Indexes, LiveIntervals *LIS)
Insert restore code for the callee-saved registers used in the function.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool test(unsigned Idx) const
Returns true if bit Idx is set.
Definition BitVector.h:482
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
iterator end()
Definition DenseMap.h:143
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:354
CycleT * getTopLevelParentCycle(const BlockT *Block) const
const HexagonRegisterInfo & getRegisterInfo() const
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
An RAII based helper class to modify MachineFunctionProperties when running pass.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
LLVM_ABI void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Legacy analysis pass which computes a MachineCycleInfo.
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setCalleeSavedInfoValid(bool v)
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
const SaveRestorePoints & getRestorePoints() const
const SaveRestorePoints & getSavePoints() const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...
Representation of each machine instruction.
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void setFlag(Register Reg, uint8_t Flag)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void updateNonWWMRegMask(BitVector &RegMask)
ArrayRef< Register > getSGPRSpillVGPRs() const
SlotIndexes pass.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
LLVM_ABI void removeMachineInstrFromMaps(MachineInstr &MI, bool AllowBundled=false)
Removes machine instruction (bundle) MI from the mapping.
LLVM_ABI void repairIndexesInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End)
Repair indexes after adding and removing instructions.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Information about stack frame layout on the target.
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
void clearDebugInfoForSpillFIs(MachineFrameInfo &MFI, MachineBasicBlock &MBB, const BitVector &SpillFIs)
Replace frame index operands with null registers in debug value instructions for the specified spill ...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
char & SILowerSGPRSpillsLegacyID
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
MachineCycleInfo::CycleT MachineCycle