LLVM 23.0.0git
SILowerSGPRSpills.cpp
Go to the documentation of this file.
1//===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10// SGPR spills, so must insert CSR SGPR spills as well as expand them.
11//
12// This pass must never create new SGPR virtual registers.
13//
14// FIXME: Must stop RegScavenger spills in later passes.
15//
16//===----------------------------------------------------------------------===//
17
18#include "SILowerSGPRSpills.h"
19#include "AMDGPU.h"
20#include "GCNSubtarget.h"
23#include "SISpillUtils.h"
30
31using namespace llvm;
32
33#define DEBUG_TYPE "si-lower-sgpr-spills"
34
36
37namespace {
38
39/// Insertion point for IMPLICIT_DEF: iterator may be MBB::end() and can't be
40/// dereferenced so the parent block is stored explicitly.
41struct LaneVGPRInsertPt {
44};
45
46static LaneVGPRInsertPt insertPt(MachineBasicBlock *MBB,
48 return {MBB, It};
49}
50
51static cl::opt<unsigned> MaxNumVGPRsForWwmAllocation(
52 "amdgpu-num-vgprs-for-wwm-alloc",
53 cl::desc("Max num VGPRs for whole-wave register allocation."),
55
56class SILowerSGPRSpills {
57private:
58 const SIRegisterInfo *TRI = nullptr;
59 const SIInstrInfo *TII = nullptr;
60 LiveIntervals *LIS = nullptr;
61 SlotIndexes *Indexes = nullptr;
62 MachineDominatorTree *MDT = nullptr;
63 MachineCycleInfo *MCI = nullptr;
64
65 // Save and Restore blocks of the current function. Typically there is a
66 // single save block, unless Windows EH funclets are involved.
67 MBBVector SaveBlocks;
68 MBBVector RestoreBlocks;
69
70 MachineBasicBlock *getCycleDomBB(MachineCycle *C);
71
72public:
73 SILowerSGPRSpills(LiveIntervals *LIS, SlotIndexes *Indexes,
75 : LIS(LIS), Indexes(Indexes), MDT(MDT), MCI(MCI) {}
76 bool run(MachineFunction &MF);
77 void calculateSaveRestoreBlocks(MachineFunction &MF);
78 bool spillCalleeSavedRegs(MachineFunction &MF,
79 SmallVectorImpl<int> &CalleeSavedFIs);
80 void updateLaneVGPRDomInstr(
82 DenseMap<Register, LaneVGPRInsertPt> &LaneVGPRDomInstr);
83 void determineRegsForWWMAllocation(MachineFunction &MF, BitVector &RegMask);
84};
85
86class SILowerSGPRSpillsLegacy : public MachineFunctionPass {
87public:
88 static char ID;
89
90 SILowerSGPRSpillsLegacy() : MachineFunctionPass(ID) {}
91
92 bool runOnMachineFunction(MachineFunction &MF) override;
93
94 void getAnalysisUsage(AnalysisUsage &AU) const override {
97 AU.setPreservesAll();
99 }
100
101 MachineFunctionProperties getClearedProperties() const override {
102 // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs.
103 return MachineFunctionProperties().setIsSSA().setNoVRegs();
104 }
105};
106
107} // end anonymous namespace
108
109char SILowerSGPRSpillsLegacy::ID = 0;
110
111INITIALIZE_PASS_BEGIN(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
112 "SI lower SGPR spill instructions", false, false)
117INITIALIZE_PASS_END(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
118 "SI lower SGPR spill instructions", false, false)
119
120char &llvm::SILowerSGPRSpillsLegacyID = SILowerSGPRSpillsLegacy::ID;
121
124 for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) {
125 if (MBB.isLiveIn(*R)) {
126 return true;
127 }
128 }
129 return false;
130}
131
132/// Insert spill code for the callee-saved registers used in the function.
133static void insertCSRSaves(MachineBasicBlock &SaveBlock,
135 LiveIntervals *LIS) {
136 MachineFunction &MF = *SaveBlock.getParent();
139 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
140 const SIRegisterInfo *RI = ST.getRegisterInfo();
141
142 MachineBasicBlock::iterator I = SaveBlock.begin();
143 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, RI)) {
144 for (const CalleeSavedInfo &CS : CSI) {
145 // Insert the spill to the stack frame.
146 MCRegister Reg = CS.getReg();
147
148 MachineInstrSpan MIS(I, &SaveBlock);
149 const TargetRegisterClass *RC = RI->getMinimalPhysRegClass(
150 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
151
152 // If this value was already livein, we probably have a direct use of the
153 // incoming register value, so don't kill at the spill point. This happens
154 // since we pass some special inputs (workgroup IDs) in the callee saved
155 // range.
156 const bool IsLiveIn = isLiveIntoMBB(Reg, SaveBlock, RI);
157 TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
158 RC, Register());
159
160 if (Indexes) {
161 assert(std::distance(MIS.begin(), I) == 1);
162 MachineInstr &Inst = *std::prev(I);
163 Indexes->insertMachineInstrInMaps(Inst);
164 }
165
166 if (LIS)
168 }
169 } else {
170 // TFI doesn't update Indexes and LIS, so we have to do it separately.
171 if (Indexes)
172 Indexes->repairIndexesInRange(&SaveBlock, SaveBlock.begin(), I);
173
174 if (LIS)
175 for (const CalleeSavedInfo &CS : CSI)
176 LIS->removeAllRegUnitsForPhysReg(CS.getReg());
177 }
178}
179
180/// Insert restore code for the callee-saved registers used in the function.
181static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
183 SlotIndexes *Indexes, LiveIntervals *LIS) {
184 MachineFunction &MF = *RestoreBlock.getParent();
188 // Restore all registers immediately before the return and any
189 // terminators that precede it.
191 const MachineBasicBlock::iterator BeforeRestoresI =
192 I == RestoreBlock.begin() ? I : std::prev(I);
193
194 // FIXME: Just emit the readlane/writelane directly
195 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
196 for (const CalleeSavedInfo &CI : reverse(CSI)) {
197 // Insert in reverse order. loadRegFromStackSlot can insert
198 // multiple instructions.
199 TFI->restoreCalleeSavedRegister(RestoreBlock, I, CI, &TII, TRI);
200
201 if (Indexes) {
202 MachineInstr &Inst = *std::prev(I);
203 Indexes->insertMachineInstrInMaps(Inst);
204 }
205
206 if (LIS)
207 LIS->removeAllRegUnitsForPhysReg(CI.getReg());
208 }
209 } else {
210 // TFI doesn't update Indexes and LIS, so we have to do it separately.
211 if (Indexes)
212 Indexes->repairIndexesInRange(&RestoreBlock, BeforeRestoresI,
213 RestoreBlock.getFirstTerminator());
214
215 if (LIS)
216 for (const CalleeSavedInfo &CS : CSI)
217 LIS->removeAllRegUnitsForPhysReg(CS.getReg());
218 }
219}
220
221/// Compute the sets of entry and return blocks for saving and restoring
222/// callee-saved registers, and placing prolog and epilog code.
223void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
224 const MachineFrameInfo &MFI = MF.getFrameInfo();
225
226 // Even when we do not change any CSR, we still want to insert the
227 // prologue and epilogue of the function.
228 // So set the save points for those.
229
230 // Use the points found by shrink-wrapping, if any.
231 if (!MFI.getSavePoints().empty()) {
232 assert(MFI.getSavePoints().size() == 1 &&
233 "Multiple save points not yet supported!");
234 const auto &SavePoint = *MFI.getSavePoints().begin();
235 SaveBlocks.push_back(SavePoint.first);
236 assert(MFI.getRestorePoints().size() == 1 &&
237 "Multiple restore points not yet supported!");
238 const auto &RestorePoint = *MFI.getRestorePoints().begin();
239 MachineBasicBlock *RestoreBlock = RestorePoint.first;
240 // If RestoreBlock does not have any successor and is not a return block
241 // then the end point is unreachable and we do not need to insert any
242 // epilogue.
243 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
244 RestoreBlocks.push_back(RestoreBlock);
245 return;
246 }
247
248 // Save refs to entry and return blocks.
249 SaveBlocks.push_back(&MF.front());
250 for (MachineBasicBlock &MBB : MF) {
251 if (MBB.isEHFuncletEntry())
252 SaveBlocks.push_back(&MBB);
253 if (MBB.isReturnBlock())
254 RestoreBlocks.push_back(&MBB);
255 }
256}
257
258// TODO: To support shrink wrapping, this would need to copy
259// PrologEpilogInserter's updateLiveness.
261 MachineBasicBlock &EntryBB = MF.front();
262
263 for (const CalleeSavedInfo &CSIReg : CSI)
264 EntryBB.addLiveIn(CSIReg.getReg());
265 EntryBB.sortUniqueLiveIns();
266}
267
268bool SILowerSGPRSpills::spillCalleeSavedRegs(
269 MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) {
270 MachineRegisterInfo &MRI = MF.getRegInfo();
271 const Function &F = MF.getFunction();
272 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
273 const SIFrameLowering *TFI = ST.getFrameLowering();
274 MachineFrameInfo &MFI = MF.getFrameInfo();
275 RegScavenger *RS = nullptr;
276
277 // Determine which of the registers in the callee save list should be saved.
278 BitVector SavedRegs;
279 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
280
281 // Add the code to save and restore the callee saved registers.
282 if (!F.hasFnAttribute(Attribute::Naked)) {
283 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
284 // necessary for verifier liveness checks.
285 MFI.setCalleeSavedInfoValid(true);
286
287 std::vector<CalleeSavedInfo> CSI;
288 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
289
290 for (unsigned I = 0; CSRegs[I]; ++I) {
291 MCRegister Reg = CSRegs[I];
292
293 if (SavedRegs.test(Reg)) {
294 const TargetRegisterClass *RC =
295 TRI->getMinimalPhysRegClass(Reg, MVT::i32);
296 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
297 TRI->getSpillAlign(*RC), true);
298
299 CSI.emplace_back(Reg, JunkFI);
300 CalleeSavedFIs.push_back(JunkFI);
301 }
302 }
303
304 if (!CSI.empty()) {
305 for (MachineBasicBlock *SaveBlock : SaveBlocks)
306 insertCSRSaves(*SaveBlock, CSI, Indexes, LIS);
307
308 // Add live ins to save blocks.
309 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");
310 updateLiveness(MF, CSI);
311
312 for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
313 insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS);
314 return true;
315 }
316 }
317
318 return false;
319}
320
321MachineBasicBlock *SILowerSGPRSpills::getCycleDomBB(MachineCycle *C) {
322 // If the insertion point lands on a cycle entry, move it to a block that
323 // dominates all entries.
324 if (C->isReducible()) {
325 if (auto *IDom = MDT->getNode(C->getHeader())->getIDom())
326 return IDom->getBlock();
327 llvm_unreachable("Expected cycle to have an IDom.");
328 return nullptr;
329 }
330
331 const SmallVectorImpl<MachineBasicBlock *> &Entries = C->getEntries();
332 assert(!Entries.empty() && "Expected cycle to have at least one entry.");
333 MachineBasicBlock *EntryBB = Entries[0];
334 for (unsigned I = 1; I < Entries.size(); ++I)
335 EntryBB = MDT->findNearestCommonDominator(EntryBB, Entries[I]);
336 return EntryBB;
337}
338
339void SILowerSGPRSpills::updateLaneVGPRDomInstr(
340 int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
341 DenseMap<Register, LaneVGPRInsertPt> &LaneVGPRDomInstr) {
342 // For the Def of a virtual LaneVGPR to dominate all its uses, we should
343 // insert an IMPLICIT_DEF before the dominating spill. Switching to a
344 // depth first order doesn't really help since the machine function can be in
345 // the unstructured control flow post-SSA. For each virtual register, hence
346 // finding the common dominator to get either the dominating spill or a block
347 // dominating all spills.
348 SIMachineFunctionInfo *FuncInfo =
349 MBB->getParent()->getInfo<SIMachineFunctionInfo>();
351 FuncInfo->getSGPRSpillToVirtualVGPRLanes(FI);
352 Register PrevLaneVGPR;
353 for (auto &Spill : VGPRSpills) {
354 if (PrevLaneVGPR == Spill.VGPR)
355 continue;
356
357 PrevLaneVGPR = Spill.VGPR;
358 auto I = LaneVGPRDomInstr.find(Spill.VGPR);
359 if (Spill.Lane == 0 && I == LaneVGPRDomInstr.end()) {
360 LaneVGPRDomInstr[Spill.VGPR] = insertPt(MBB, InsertPt);
361 } else {
362 assert(I != LaneVGPRDomInstr.end());
363 LaneVGPRInsertPt Prev = I->second;
364 MachineBasicBlock *PrevInsertMBB = Prev.MBB;
365 MachineBasicBlock::iterator PrevInsertPt = Prev.It;
366 MachineBasicBlock *DomMBB = PrevInsertMBB;
367 if (DomMBB == MBB) {
368 // The insertion point earlier selected in a predecessor block whose
369 // spills are currently being lowered. The earlier InsertPt would be
370 // the one just before the block terminator and it should be changed
371 // if we insert any new spill in it.
372 if (PrevInsertPt == MBB->end() ||
373 MDT->dominates(&*InsertPt, &*PrevInsertPt))
374 I->second = insertPt(MBB, InsertPt);
375
376 continue;
377 }
378
379 // Find the common dominator block between PrevInsertPt and the
380 // current spill.
381 DomMBB = MDT->findNearestCommonDominator(DomMBB, MBB);
382
383 if (DomMBB == MBB)
384 I->second = insertPt(MBB, InsertPt);
385 else if (DomMBB != PrevInsertMBB)
386 I->second = insertPt(DomMBB, DomMBB->getFirstTerminator());
387 }
388 }
389}
390
391void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
392 BitVector &RegMask) {
393 // Determine an optimal number of VGPRs for WWM allocation. The complement
394 // list will be available for allocating other VGPR virtual registers.
395 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
396 MachineRegisterInfo &MRI = MF.getRegInfo();
397 BitVector ReservedRegs = TRI->getReservedRegs(MF);
398 BitVector NonWwmAllocMask(TRI->getNumRegs());
399 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
400
401 // FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future
402 // to have a balanced allocation between WWM values and per-thread vector
403 // register operands.
404 unsigned NumRegs = MaxNumVGPRsForWwmAllocation;
405 NumRegs =
406 std::min(static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), NumRegs);
407
408 auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.getFunction());
409 // Try to use the highest available registers for now. Later after
410 // vgpr-regalloc, they can be shifted to the lowest range.
411 unsigned I = 0;
412 for (unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - 1;
413 (I < NumRegs) && (Reg >= AMDGPU::VGPR0); --Reg) {
414 if (!ReservedRegs.test(Reg) &&
415 !MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/true)) {
416 TRI->markSuperRegs(RegMask, Reg);
417 ++I;
418 }
419 }
420
421 if (I != NumRegs) {
422 // Reserve an arbitrary register and report the error.
423 TRI->markSuperRegs(RegMask, AMDGPU::VGPR0);
425 "cannot find enough VGPRs for wwm-regalloc");
426 }
427}
428
429bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) {
430 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
431 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
432 auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
433 SlotIndexes *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
434 MachineDominatorTree *MDT =
435 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
436 MachineCycleInfo *MCI =
437 &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
438 return SILowerSGPRSpills(LIS, Indexes, MDT, MCI).run(MF);
439}
440
441bool SILowerSGPRSpills::run(MachineFunction &MF) {
442 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
443 TII = ST.getInstrInfo();
444 TRI = &TII->getRegisterInfo();
445
446 assert(SaveBlocks.empty() && RestoreBlocks.empty());
447
448 // First, expose any CSR SGPR spills. This is mostly the same as what PEI
449 // does, but somewhat simpler.
450 calculateSaveRestoreBlocks(MF);
451 SmallVector<int> CalleeSavedFIs;
452 bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);
453
454 MachineFrameInfo &MFI = MF.getFrameInfo();
455 MachineRegisterInfo &MRI = MF.getRegInfo();
456 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
457
458 if (!MFI.hasStackObjects() && !HasCSRs) {
459 SaveBlocks.clear();
460 RestoreBlocks.clear();
461 return false;
462 }
463
464 bool MadeChange = false;
465 bool SpilledToVirtVGPRLanes = false;
466
467 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
468 // handled as SpilledToReg in regular PrologEpilogInserter.
469 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
470 (HasCSRs || FuncInfo->hasSpilledSGPRs());
471 if (HasSGPRSpillToVGPR) {
472 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
473 // are spilled to VGPRs, in which case we can eliminate the stack usage.
474 //
475 // This operates under the assumption that only other SGPR spills are users
476 // of the frame index.
477
478 // To track the spill frame indices handled in this pass.
479 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
480
481 // To track the IMPLICIT_DEF insertion point for the lane vgprs.
482 DenseMap<Register, LaneVGPRInsertPt> LaneVGPRDomInstr;
483
484 for (MachineBasicBlock &MBB : MF) {
485 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
486 if (!TII->isSGPRSpill(MI))
487 continue;
488
489 if (MI.getOperand(0).isUndef()) {
490 if (Indexes)
492 MI.eraseFromParent();
493 continue;
494 }
495
496 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
498
499 bool IsCalleeSaveSGPRSpill = llvm::is_contained(CalleeSavedFIs, FI);
500 if (IsCalleeSaveSGPRSpill) {
501 // Spill callee-saved SGPRs into physical VGPR lanes.
502
503 // TODO: This is to ensure the CFIs are static for efficient frame
504 // unwinding in the debugger. Spilling them into virtual VGPR lanes
505 // involve regalloc to allocate the physical VGPRs and that might
506 // cause intermediate spill/split of such liveranges for successful
507 // allocation. This would result in broken CFI encoding unless the
508 // regalloc aware CFI generation to insert new CFIs along with the
509 // intermediate spills is implemented. There is no such support
510 // currently exist in the LLVM compiler.
511 if (FuncInfo->allocateSGPRSpillToVGPRLane(
512 MF, FI, /*SpillToPhysVGPRLane=*/true)) {
513 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
514 MI, FI, nullptr, Indexes, LIS, true);
515 if (!Spilled)
517 "failed to spill SGPR to physical VGPR lane when allocated");
518 }
519 } else {
520 MachineInstrSpan MIS(&MI, &MBB);
521 if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
522 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
523 MI, FI, nullptr, Indexes, LIS);
524 if (!Spilled)
526 "failed to spill SGPR to virtual VGPR lane when allocated");
527 SpillFIs.set(FI);
528 updateLaneVGPRDomInstr(FI, &MBB, MIS.begin(), LaneVGPRDomInstr);
529 SpilledToVirtVGPRLanes = true;
530 }
531 }
532 }
533 }
534
535 for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) {
536 LaneVGPRInsertPt IP = LaneVGPRDomInstr[Reg];
537 if (MachineCycle *C = MCI->getTopLevelParentCycle(IP.MBB)) {
538 MachineBasicBlock *AdjMBB = getCycleDomBB(C);
539 IP = insertPt(AdjMBB, AdjMBB->getFirstTerminator());
540 }
541 // Insert the IMPLICIT_DEF at the identified points.
542 MachineBasicBlock &Block = *IP.MBB;
543 DebugLoc DL = Block.findDebugLoc(IP.It);
544 auto MIB = BuildMI(Block, IP.It, DL, TII->get(AMDGPU::IMPLICIT_DEF), Reg);
545
546 // Add WWM flag to the virtual register.
548
549 // Set SGPR_SPILL asm printer flag
550 MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
551 if (LIS) {
552 LIS->InsertMachineInstrInMaps(*MIB);
554 }
555 }
556
557 // Determine the registers for WWM allocation and also compute the register
558 // mask for non-wwm VGPR allocation.
559 if (FuncInfo->getSGPRSpillVGPRs().size()) {
560 BitVector WwmRegMask(TRI->getNumRegs());
561
562 determineRegsForWWMAllocation(MF, WwmRegMask);
563
564 BitVector NonWwmRegMask(WwmRegMask);
565 NonWwmRegMask.flip().clearBitsNotInMask(TRI->getAllVGPRRegMask());
566
567 // The complement set will be the registers for non-wwm (per-thread) vgpr
568 // allocation.
569 FuncInfo->updateNonWWMRegMask(NonWwmRegMask);
570 }
571
572 for (MachineBasicBlock &MBB : MF)
573 clearDebugInfoForSpillFIs(MFI, MBB, SpillFIs);
574
575 // All those frame indices which are dead by now should be removed from the
576 // function frame. Otherwise, there is a side effect such as re-mapping of
577 // free frame index ids by the later pass(es) like "stack slot coloring"
578 // which in turn could mess-up with the book keeping of "frame index to VGPR
579 // lane".
580 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
581
582 MadeChange = true;
583 }
584
585 if (SpilledToVirtVGPRLanes) {
586 const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();
587 // Shift back the reserved SGPR for EXEC copy into the lowest range.
588 // This SGPR is reserved to handle the whole-wave spill/copy operations
589 // that might get inserted during vgpr regalloc.
590 Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF);
591 if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) <
592 TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy()))
593 FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR);
594 } else {
595 // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
596 // spills/copies. Reset the SGPR reserved for EXEC copy.
597 FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister);
598 }
599
600 SaveBlocks.clear();
601 RestoreBlocks.clear();
602
603 return MadeChange;
604}
605
606PreservedAnalyses
609 MFPropsModifier _(*this, MF);
610 auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
611 auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF);
614 SILowerSGPRSpills(LIS, Indexes, MDT, &MCI).run(MF);
615 return PreservedAnalyses::all();
616}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static void insertCSRRestores(MachineBasicBlock &RestoreBlock, std::vector< CalleeSavedInfo > &CSI)
Insert restore code for the callee-saved registers used in the function.
SmallVector< MachineBasicBlock *, 4 > MBBVector
static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef< CalleeSavedInfo > CSI)
Insert spill code for the callee-saved registers used in the function.
static void updateLiveness(MachineFunction &MF)
Helper function to update the liveness information for the callee-saved registers.
This file declares the machine register scavenger class.
static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB, const TargetRegisterInfo *TRI)
static void insertCSRRestores(MachineBasicBlock &RestoreBlock, MutableArrayRef< CalleeSavedInfo > CSI, SlotIndexes *Indexes, LiveIntervals *LIS)
Insert restore code for the callee-saved registers used in the function.
static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef< CalleeSavedInfo > CSI, SlotIndexes *Indexes, LiveIntervals *LIS)
Insert spill code for the callee-saved registers used in the function.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool test(unsigned Idx) const
Definition BitVector.h:480
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
CycleT * getTopLevelParentCycle(const BlockT *Block) const
const HexagonRegisterInfo & getRegisterInfo() const
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
An RAII based helper class to modify MachineFunctionProperties when running pass.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
LLVM_ABI void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Legacy analysis pass which computes a MachineCycleInfo.
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setCalleeSavedInfoValid(bool v)
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
const SaveRestorePoints & getRestorePoints() const
const SaveRestorePoints & getSavePoints() const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...
MachineBasicBlock::iterator begin()
Representation of each machine instruction.
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Wrapper class representing virtual and physical registers.
Definition Register.h:20
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void setFlag(Register Reg, uint8_t Flag)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void updateNonWWMRegMask(BitVector &RegMask)
ArrayRef< Register > getSGPRSpillVGPRs() const
SlotIndexes pass.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
LLVM_ABI void removeMachineInstrFromMaps(MachineInstr &MI, bool AllowBundled=false)
Removes machine instruction (bundle) MI from the mapping.
LLVM_ABI void repairIndexesInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End)
Repair indexes after adding and removing instructions.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Information about stack frame layout on the target.
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
void clearDebugInfoForSpillFIs(MachineFrameInfo &MFI, MachineBasicBlock &MBB, const BitVector &SpillFIs)
Replace frame index operands with null registers in debug value instructions for the specified spill ...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
char & SILowerSGPRSpillsLegacyID
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
MachineCycleInfo::CycleT MachineCycle