LLVM 22.0.0git
SIFrameLowering.cpp
Go to the documentation of this file.
1//===----------------------- SIFrameLowering.cpp --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8
9#include "SIFrameLowering.h"
10#include "AMDGPU.h"
11#include "AMDGPULaneMaskUtils.h"
12#include "GCNSubtarget.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE "frame-info"
23
25 "amdgpu-spill-vgpr-to-agpr",
26 cl::desc("Enable spilling VGPRs to AGPRs"),
28 cl::init(true));
29
30// Find a register matching \p RC from \p LiveUnits which is unused and
31// available throughout the function. On failure, returns AMDGPU::NoRegister.
32// TODO: Rewrite the loop here to iterate over MCRegUnits instead of
33// MCRegisters. This should reduce the number of iterations and avoid redundant
34// checking.
36 const LiveRegUnits &LiveUnits,
37 const TargetRegisterClass &RC) {
38 for (MCRegister Reg : RC) {
39 if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
40 !MRI.isReserved(Reg))
41 return Reg;
42 }
43 return MCRegister();
44}
45
46// Find a scratch register that we can use in the prologue. We avoid using
47// callee-save registers since they may appear to be free when this is called
48// from canUseAsPrologue (during shrink wrapping), but then no longer be free
49// when this is called from emitPrologue.
52 const TargetRegisterClass &RC, bool Unused = false) {
53 // Mark callee saved registers as used so we will not choose them.
54 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
55 for (unsigned i = 0; CSRegs[i]; ++i)
56 LiveUnits.addReg(CSRegs[i]);
57
58 // We are looking for a register that can be used throughout the entire
59 // function, so any use is unacceptable.
60 if (Unused)
61 return findUnusedRegister(MRI, LiveUnits, RC);
62
63 for (MCRegister Reg : RC) {
64 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
65 return Reg;
66 }
67
68 return MCRegister();
69}
70
71/// Query target location for spilling SGPRs
72/// \p IncludeScratchCopy : Also look for free scratch SGPRs
74 MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
75 const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
76 bool IncludeScratchCopy = true) {
78 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
79
80 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
81 const SIRegisterInfo *TRI = ST.getRegisterInfo();
82 unsigned Size = TRI->getSpillSize(RC);
83 Align Alignment = TRI->getSpillAlign(RC);
84
85 // We need to save and restore the given SGPR.
86
87 Register ScratchSGPR;
88 // 1: Try to save the given register into an unused scratch SGPR. The
89 // LiveUnits should have all the callee saved registers marked as used. For
90 // certain cases we skip copy to scratch SGPR.
91 if (IncludeScratchCopy)
92 ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
93
94 if (!ScratchSGPR) {
95 int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
97
98 if (TRI->spillSGPRToVGPR() &&
99 MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,
100 /*IsPrologEpilog=*/true)) {
101 // 2: There's no free lane to spill, and no free register to save the
102 // SGPR, so we're forced to take another VGPR to use for the spill.
106
107 LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();
108 dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
109 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
110 << '\n';);
111 } else {
112 // Remove dead <FI> index
114 // 3: If all else fails, spill the register to memory.
115 FI = FrameInfo.CreateSpillStackObject(Size, Alignment);
117 SGPR,
119 LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "
120 << printReg(SGPR, TRI) << '\n');
121 }
122 } else {
126 LiveUnits.addReg(ScratchSGPR);
127 LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
128 << printReg(ScratchSGPR, TRI) << '\n');
129 }
130}
131
132// We need to specially emit stack operations here because a different frame
133// register is used than in the rest of the function, as getFrameRegister would
134// use.
135static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
136 const SIMachineFunctionInfo &FuncInfo,
137 LiveRegUnits &LiveUnits, MachineFunction &MF,
140 Register SpillReg, int FI, Register FrameReg,
141 int64_t DwordOff = 0) {
142 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
143 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
144
145 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
148 PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
149 FrameInfo.getObjectAlign(FI));
150 LiveUnits.addReg(SpillReg);
151 bool IsKill = !MBB.isLiveIn(SpillReg);
152 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
153 DwordOff, MMO, nullptr, &LiveUnits);
154 if (IsKill)
155 LiveUnits.removeReg(SpillReg);
156}
157
158static void buildEpilogRestore(const GCNSubtarget &ST,
159 const SIRegisterInfo &TRI,
160 const SIMachineFunctionInfo &FuncInfo,
161 LiveRegUnits &LiveUnits, MachineFunction &MF,
164 const DebugLoc &DL, Register SpillReg, int FI,
165 Register FrameReg, int64_t DwordOff = 0) {
166 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
167 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
168
169 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
172 PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
173 FrameInfo.getObjectAlign(FI));
174 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
175 DwordOff, MMO, nullptr, &LiveUnits);
176}
177
179 const DebugLoc &DL, const SIInstrInfo *TII,
180 Register TargetReg) {
181 MachineFunction *MF = MBB.getParent();
183 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
184 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
185 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
186 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
187
188 if (MFI->getGITPtrHigh() != 0xffffffff) {
189 BuildMI(MBB, I, DL, SMovB32, TargetHi)
190 .addImm(MFI->getGITPtrHigh())
191 .addReg(TargetReg, RegState::ImplicitDefine);
192 } else {
193 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);
194 BuildMI(MBB, I, DL, GetPC64, TargetReg);
195 }
196 Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
197 MF->getRegInfo().addLiveIn(GitPtrLo);
198 MBB.addLiveIn(GitPtrLo);
199 BuildMI(MBB, I, DL, SMovB32, TargetLo)
200 .addReg(GitPtrLo);
201}
202
203static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
204 const SIMachineFunctionInfo *FuncInfo,
206 MachineBasicBlock::iterator MBBI, bool IsProlog) {
207 if (LiveUnits.empty()) {
208 LiveUnits.init(TRI);
209 if (IsProlog) {
210 LiveUnits.addLiveIns(MBB);
211 } else {
212 // In epilog.
213 LiveUnits.addLiveOuts(MBB);
214 LiveUnits.stepBackward(*MBBI);
215 }
216 }
217}
218
219namespace llvm {
220
221// SpillBuilder to save/restore special SGPR spills like the one needed for FP,
222// BP, etc. These spills are delayed until the current function's frame is
223// finalized. For a given register, the builder uses the
224// PrologEpilogSGPRSaveRestoreInfo to decide the spill method.
228 MachineFunction &MF;
229 const GCNSubtarget &ST;
230 MachineFrameInfo &MFI;
231 SIMachineFunctionInfo *FuncInfo;
232 const SIInstrInfo *TII;
233 const SIRegisterInfo &TRI;
234 Register SuperReg;
236 LiveRegUnits &LiveUnits;
237 const DebugLoc &DL;
238 Register FrameReg;
239 ArrayRef<int16_t> SplitParts;
240 unsigned NumSubRegs;
241 unsigned EltSize = 4;
242
243 void saveToMemory(const int FI) const {
244 MachineRegisterInfo &MRI = MF.getRegInfo();
245 assert(!MFI.isDeadObjectIndex(FI));
246
247 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
248
250 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
251 if (!TmpVGPR)
252 report_fatal_error("failed to find free scratch register");
253
254 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
255 Register SubReg = NumSubRegs == 1
256 ? SuperReg
257 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
258 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
259 .addReg(SubReg);
260
261 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
262 FI, FrameReg, DwordOff);
263 DwordOff += 4;
264 }
265 }
266
267 void saveToVGPRLane(const int FI) const {
268 assert(!MFI.isDeadObjectIndex(FI));
269
270 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
272 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
273 assert(Spill.size() == NumSubRegs);
274
275 for (unsigned I = 0; I < NumSubRegs; ++I) {
276 Register SubReg = NumSubRegs == 1
277 ? SuperReg
278 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
279 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
280 Spill[I].VGPR)
281 .addReg(SubReg)
282 .addImm(Spill[I].Lane)
283 .addReg(Spill[I].VGPR, RegState::Undef);
284 }
285 }
286
287 void copyToScratchSGPR(Register DstReg) const {
288 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
289 .addReg(SuperReg)
291 }
292
293 void restoreFromMemory(const int FI) {
294 MachineRegisterInfo &MRI = MF.getRegInfo();
295
296 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
298 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
299 if (!TmpVGPR)
300 report_fatal_error("failed to find free scratch register");
301
302 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
303 Register SubReg = NumSubRegs == 1
304 ? SuperReg
305 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
306
307 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
308 TmpVGPR, FI, FrameReg, DwordOff);
309 MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass);
310 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
311 .addReg(TmpVGPR, RegState::Kill);
312 DwordOff += 4;
313 }
314 }
315
316 void restoreFromVGPRLane(const int FI) {
317 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
319 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
320 assert(Spill.size() == NumSubRegs);
321
322 for (unsigned I = 0; I < NumSubRegs; ++I) {
323 Register SubReg = NumSubRegs == 1
324 ? SuperReg
325 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
326 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
327 .addReg(Spill[I].VGPR)
328 .addImm(Spill[I].Lane);
329 }
330 }
331
332 void copyFromScratchSGPR(Register SrcReg) const {
333 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
334 .addReg(SrcReg)
336 }
337
338public:
343 const DebugLoc &DL, const SIInstrInfo *TII,
344 const SIRegisterInfo &TRI,
345 LiveRegUnits &LiveUnits, Register FrameReg)
346 : MI(MI), MBB(MBB), MF(*MBB.getParent()),
347 ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
348 FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
349 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
350 FrameReg(FrameReg) {
351 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
352 SplitParts = TRI.getRegSplitParts(RC, EltSize);
353 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
354
355 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
356 }
357
358 void save() {
359 switch (SI.getKind()) {
361 return saveToMemory(SI.getIndex());
363 return saveToVGPRLane(SI.getIndex());
365 return copyToScratchSGPR(SI.getReg());
366 }
367 }
368
369 void restore() {
370 switch (SI.getKind()) {
372 return restoreFromMemory(SI.getIndex());
374 return restoreFromVGPRLane(SI.getIndex());
376 return copyFromScratchSGPR(SI.getReg());
377 }
378 }
379};
380
381} // namespace llvm
382
383// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
384void SIFrameLowering::emitEntryFunctionFlatScratchInit(
386 const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
387 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
388 const SIInstrInfo *TII = ST.getInstrInfo();
389 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
390 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
391
392 // We don't need this if we only have spills since there is no user facing
393 // scratch.
394
395 // TODO: If we know we don't have flat instructions earlier, we can omit
396 // this from the input registers.
397 //
398 // TODO: We only need to know if we access scratch space through a flat
399 // pointer. Because we only detect if flat instructions are used at all,
400 // this will be used more often than necessary on VI.
401
402 Register FlatScrInitLo;
403 Register FlatScrInitHi;
404
405 if (ST.isAmdPalOS()) {
406 // Extract the scratch offset from the descriptor in the GIT
407 LiveRegUnits LiveUnits;
408 LiveUnits.init(*TRI);
409 LiveUnits.addLiveIns(MBB);
410
411 // Find unused reg to load flat scratch init into
412 MachineRegisterInfo &MRI = MF.getRegInfo();
413 Register FlatScrInit = AMDGPU::NoRegister;
414 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
415 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
416 AllSGPR64s = AllSGPR64s.slice(
417 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
418 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
419 for (MCPhysReg Reg : AllSGPR64s) {
420 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
421 MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
422 FlatScrInit = Reg;
423 break;
424 }
425 }
426 assert(FlatScrInit && "Failed to find free register for scratch init");
427
428 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
429 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
430
431 buildGitPtr(MBB, I, DL, TII, FlatScrInit);
432
433 // We now have the GIT ptr - now get the scratch descriptor from the entry
434 // at offset 0 (or offset 16 for a compute shader).
435 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
436 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
437 auto *MMO = MF.getMachineMemOperand(
438 PtrInfo,
441 8, Align(4));
442 unsigned Offset =
444 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
445 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
446 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
447 .addReg(FlatScrInit)
448 .addImm(EncodedOffset) // offset
449 .addImm(0) // cpol
450 .addMemOperand(MMO);
451
452 // Mask the offset in [47:0] of the descriptor
453 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
454 auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
455 .addReg(FlatScrInitHi)
456 .addImm(0xffff);
457 And->getOperand(3).setIsDead(); // Mark SCC as dead.
458 } else {
459 Register FlatScratchInitReg =
461 assert(FlatScratchInitReg);
462
463 MachineRegisterInfo &MRI = MF.getRegInfo();
464 MRI.addLiveIn(FlatScratchInitReg);
465 MBB.addLiveIn(FlatScratchInitReg);
466
467 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
468 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
469 }
470
471 // Do a 64-bit pointer add.
472 if (ST.flatScratchIsPointer()) {
473 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
474 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
475 .addReg(FlatScrInitLo)
476 .addReg(ScratchWaveOffsetReg);
477 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
478 FlatScrInitHi)
479 .addReg(FlatScrInitHi)
480 .addImm(0);
481 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
482
483 using namespace AMDGPU::Hwreg;
484 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
485 .addReg(FlatScrInitLo)
486 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
487 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
488 .addReg(FlatScrInitHi)
489 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
490 return;
491 }
492
493 // For GFX9.
494 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
495 .addReg(FlatScrInitLo)
496 .addReg(ScratchWaveOffsetReg);
497 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
498 AMDGPU::FLAT_SCR_HI)
499 .addReg(FlatScrInitHi)
500 .addImm(0);
501 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
502
503 return;
504 }
505
506 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
507
508 // Copy the size in bytes.
509 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
510 .addReg(FlatScrInitHi, RegState::Kill);
511
512 // Add wave offset in bytes to private base offset.
513 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
514 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
515 .addReg(FlatScrInitLo)
516 .addReg(ScratchWaveOffsetReg);
517
518 // Convert offset to 256-byte units.
519 auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
520 AMDGPU::FLAT_SCR_HI)
521 .addReg(FlatScrInitLo, RegState::Kill)
522 .addImm(8);
523 LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
524}
525
526// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
527// memory. They should have been removed by now.
529 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
530 I != E; ++I) {
531 if (!MFI.isDeadObjectIndex(I))
532 return false;
533 }
534
535 return true;
536}
537
538// Shift down registers reserved for the scratch RSRC.
539Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
540 MachineFunction &MF) const {
541
542 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
543 const SIInstrInfo *TII = ST.getInstrInfo();
544 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
545 MachineRegisterInfo &MRI = MF.getRegInfo();
546 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
547
548 assert(MFI->isEntryFunction());
549
550 Register ScratchRsrcReg = MFI->getScratchRSrcReg();
551
552 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
554 return Register();
555
556 if (ST.hasSGPRInitBug() ||
557 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
558 return ScratchRsrcReg;
559
560 // We reserved the last registers for this. Shift it down to the end of those
561 // which were actually used.
562 //
563 // FIXME: It might be safer to use a pseudoregister before replacement.
564
565 // FIXME: We should be able to eliminate unused input registers. We only
566 // cannot do this for the resources required for scratch access. For now we
567 // skip over user SGPRs and may leave unused holes.
568
569 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
570 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
571 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
572
573 // Skip the last N reserved elements because they should have already been
574 // reserved for VCC etc.
575 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
576 for (MCPhysReg Reg : AllSGPR128s) {
577 // Pick the first unallocated one. Make sure we don't clobber the other
578 // reserved input we needed. Also for PAL, make sure we don't clobber
579 // the GIT pointer passed in SGPR0 or SGPR8.
580 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
581 (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
582 MRI.replaceRegWith(ScratchRsrcReg, Reg);
584 MRI.reserveReg(Reg, TRI);
585 return Reg;
586 }
587 }
588
589 return ScratchRsrcReg;
590}
591
592static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
593 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
594}
595
597 MachineBasicBlock &MBB) const {
598 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
599
600 // FIXME: If we only have SGPR spills, we won't actually be using scratch
601 // memory since these spill to VGPRs. We should be cleaning up these unused
602 // SGPR spill frame indices somewhere.
603
604 // FIXME: We still have implicit uses on SGPR spill instructions in case they
605 // need to spill to vector memory. It's likely that will not happen, but at
606 // this point it appears we need the setup. This part of the prolog should be
607 // emitted after frame indices are eliminated.
608
609 // FIXME: Remove all of the isPhysRegUsed checks
610
612 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
613 const SIInstrInfo *TII = ST.getInstrInfo();
614 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
616 const Function &F = MF.getFunction();
617 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
618
619 assert(MFI->isEntryFunction());
620
621 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
623
624 // We need to do the replacement of the private segment buffer register even
625 // if there are no stack objects. There could be stores to undef or a
626 // constant without an associated object.
627 //
628 // This will return `Register()` in cases where there are no actual
629 // uses of the SRSRC.
630 Register ScratchRsrcReg;
631 if (!ST.enableFlatScratch())
632 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
633
634 // Make the selected register live throughout the function.
635 if (ScratchRsrcReg) {
636 for (MachineBasicBlock &OtherBB : MF) {
637 if (&OtherBB != &MBB) {
638 OtherBB.addLiveIn(ScratchRsrcReg);
639 }
640 }
641 }
642
643 // Now that we have fixed the reserved SRSRC we need to locate the
644 // (potentially) preloaded SRSRC.
645 Register PreloadedScratchRsrcReg;
646 if (ST.isAmdHsaOrMesa(F)) {
647 PreloadedScratchRsrcReg =
649 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
650 // We added live-ins during argument lowering, but since they were not
651 // used they were deleted. We're adding the uses now, so add them back.
652 MRI.addLiveIn(PreloadedScratchRsrcReg);
653 MBB.addLiveIn(PreloadedScratchRsrcReg);
654 }
655 }
656
657 // Debug location must be unknown since the first debug location is used to
658 // determine the end of the prologue.
659 DebugLoc DL;
661
662 // We found the SRSRC first because it needs four registers and has an
663 // alignment requirement. If the SRSRC that we found is clobbering with
664 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
665 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
666 // wave offset to a free SGPR.
667 Register ScratchWaveOffsetReg;
668 if (PreloadedScratchWaveOffsetReg &&
669 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
670 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
671 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
672 AllSGPRs = AllSGPRs.slice(
673 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
674 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
675 for (MCPhysReg Reg : AllSGPRs) {
676 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
677 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
678 ScratchWaveOffsetReg = Reg;
679 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
680 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
681 break;
682 }
683 }
684
685 // FIXME: We can spill incoming arguments and restore at the end of the
686 // prolog.
687 if (!ScratchWaveOffsetReg)
689 "could not find temporary scratch offset register in prolog");
690 } else {
691 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
692 }
693 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
694
695 unsigned Offset = FrameInfo.getStackSize() * getScratchScaleFactor(ST);
696 if (!mayReserveScratchForCWSR(MF)) {
697 if (hasFP(MF)) {
699 assert(FPReg != AMDGPU::FP_REG);
700 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
701 }
702
705 assert(SPReg != AMDGPU::SP_REG);
706 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
707 }
708 } else {
709 // We need to check if we're on a compute queue - if we are, then the CWSR
710 // trap handler may need to store some VGPRs on the stack. The first VGPR
711 // block is saved separately, so we only need to allocate space for any
712 // additional VGPR blocks used. For now, we will make sure there's enough
713 // room for the theoretical maximum number of VGPRs that can be allocated.
714 // FIXME: Figure out if the shader uses fewer VGPRs in practice.
715 assert(hasFP(MF));
717 assert(FPReg != AMDGPU::FP_REG);
718 unsigned VGPRSize = llvm::alignTo(
719 (ST.getAddressableNumVGPRs(MFI->getDynamicVGPRBlockSize()) -
721 MFI->getDynamicVGPRBlockSize())) *
722 4,
723 FrameInfo.getMaxAlign());
725
726 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), FPReg)
729 // The MicroEngine ID is 0 for the graphics queue, and 1 or 2 for compute
730 // (3 is unused, so we ignore it). Unfortunately, S_GETREG doesn't set
731 // SCC, so we need to check for 0 manually.
732 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMP_LG_U32)).addImm(0).addReg(FPReg);
733 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), FPReg).addImm(VGPRSize);
736 assert(SPReg != AMDGPU::SP_REG);
737
738 // If at least one of the constants can be inlined, then we can use
739 // s_cselect. Otherwise, use a mov and cmovk.
740 if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm()) ||
742 ST.hasInv2PiInlineImm())) {
743 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CSELECT_B32), SPReg)
744 .addImm(Offset + VGPRSize)
745 .addImm(Offset);
746 } else {
747 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
748 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), SPReg)
749 .addImm(Offset + VGPRSize);
750 }
751 }
752 }
753
754 bool NeedsFlatScratchInit =
756 (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
757 (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
758
759 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
760 PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
761 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
762 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
763 }
764
765 if (NeedsFlatScratchInit) {
766 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
767 }
768
769 if (ScratchRsrcReg) {
770 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
771 PreloadedScratchRsrcReg,
772 ScratchRsrcReg, ScratchWaveOffsetReg);
773 }
774}
775
776// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
777void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
779 const DebugLoc &DL, Register PreloadedScratchRsrcReg,
780 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
781
782 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
783 const SIInstrInfo *TII = ST.getInstrInfo();
784 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
786 const Function &Fn = MF.getFunction();
787
788 if (ST.isAmdPalOS()) {
789 // The pointer to the GIT is formed from the offset passed in and either
790 // the amdgpu-git-ptr-high function attribute or the top part of the PC
791 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
792 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
793
794 buildGitPtr(MBB, I, DL, TII, Rsrc01);
795
796 // We now have the GIT ptr - now get the scratch descriptor from the entry
797 // at offset 0 (or offset 16 for a compute shader).
799 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
800 auto *MMO = MF.getMachineMemOperand(
801 PtrInfo,
804 16, Align(4));
805 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
806 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
807 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
808 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
809 .addReg(Rsrc01)
810 .addImm(EncodedOffset) // offset
811 .addImm(0) // cpol
812 .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
813 .addMemOperand(MMO);
814
815 // The driver will always set the SRD for wave 64 (bits 118:117 of
816 // descriptor / bits 22:21 of third sub-reg will be 0b11)
817 // If the shader is actually wave32 we have to modify the const_index_stride
818 // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
819 // reason the driver does this is that there can be cases where it presents
820 // 2 shaders with different wave size (e.g. VsFs).
821 // TODO: convert to using SCRATCH instructions or multiple SRD buffers
822 if (ST.isWave32()) {
823 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
824 BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
825 .addImm(21)
826 .addReg(Rsrc03);
827 }
828 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
829 assert(!ST.isAmdHsaOrMesa(Fn));
830 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
831
832 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
833 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
834
835 // Use relocations to get the pointer, and setup the other bits manually.
836 uint64_t Rsrc23 = TII->getScratchRsrcWords23();
837
839 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
840
842 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
843
844 BuildMI(MBB, I, DL, Mov64, Rsrc01)
846 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
847 } else {
848 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
849
850 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
851 auto *MMO = MF.getMachineMemOperand(
852 PtrInfo,
855 8, Align(4));
856 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
858 .addImm(0) // offset
859 .addImm(0) // cpol
860 .addMemOperand(MMO)
861 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
862
865 }
866 } else {
867 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
868 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
869
870 BuildMI(MBB, I, DL, SMovB32, Rsrc0)
871 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
872 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
873
874 BuildMI(MBB, I, DL, SMovB32, Rsrc1)
875 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
876 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
877 }
878
879 BuildMI(MBB, I, DL, SMovB32, Rsrc2)
880 .addImm(Lo_32(Rsrc23))
881 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
882
883 BuildMI(MBB, I, DL, SMovB32, Rsrc3)
884 .addImm(Hi_32(Rsrc23))
885 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
886 } else if (ST.isAmdHsaOrMesa(Fn)) {
887 assert(PreloadedScratchRsrcReg);
888
889 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
890 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
891 .addReg(PreloadedScratchRsrcReg, RegState::Kill);
892 }
893 }
894
895 // Add the scratch wave offset into the scratch RSRC.
896 //
897 // We only want to update the first 48 bits, which is the base address
898 // pointer, without touching the adjacent 16 bits of flags. We know this add
899 // cannot carry-out from bit 47, otherwise the scratch allocation would be
900 // impossible to fit in the 48-bit global address space.
901 //
902 // TODO: Evaluate if it is better to just construct an SRD using the flat
903 // scratch init and some constants rather than update the one we are passed.
904 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
905 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
906
907 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
908 // the kernel body via inreg arguments.
909 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
910 .addReg(ScratchRsrcSub0)
911 .addReg(ScratchWaveOffsetReg)
912 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
913 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
914 .addReg(ScratchRsrcSub1)
915 .addImm(0)
916 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
917 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
918}
919
921 switch (ID) {
925 return true;
928 return false;
929 }
930 llvm_unreachable("Invalid TargetStackID::Value");
931}
932
933// Activate only the inactive lanes when \p EnableInactiveLanes is true.
934// Otherwise, activate all lanes. It returns the saved exec.
936 MachineFunction &MF,
939 const DebugLoc &DL, bool IsProlog,
940 bool EnableInactiveLanes) {
941 Register ScratchExecCopy;
943 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
944 const SIInstrInfo *TII = ST.getInstrInfo();
945 const SIRegisterInfo &TRI = TII->getRegisterInfo();
947
948 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
949
950 if (FuncInfo->isWholeWaveFunction()) {
951 // Whole wave functions already have a copy of the original EXEC mask that
952 // we can use.
953 assert(IsProlog && "Epilog should look at return, not setup");
954 ScratchExecCopy =
955 TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();
956 assert(ScratchExecCopy && "Couldn't find copy of EXEC");
957 } else {
958 ScratchExecCopy = findScratchNonCalleeSaveRegister(
959 MRI, LiveUnits, *TRI.getWaveMaskRegClass());
960 }
961
962 if (!ScratchExecCopy)
963 report_fatal_error("failed to find free scratch register");
964
965 LiveUnits.addReg(ScratchExecCopy);
966
967 const unsigned SaveExecOpc =
968 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
969 : AMDGPU::S_OR_SAVEEXEC_B32)
970 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
971 : AMDGPU::S_OR_SAVEEXEC_B64);
972 auto SaveExec =
973 BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);
974 SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
975
976 return ScratchExecCopy;
977}
978
982 Register FrameReg, Register FramePtrRegScratchCopy) const {
984 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
985 const SIInstrInfo *TII = ST.getInstrInfo();
986 const SIRegisterInfo &TRI = TII->getRegisterInfo();
989
990 // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
991 // registers. However, save all lanes of callee-saved VGPRs. Due to this, we
992 // might end up flipping the EXEC bits twice.
993 Register ScratchExecCopy;
994 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
995 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
996 if (!WWMScratchRegs.empty())
997 ScratchExecCopy =
998 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
999 /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
1000
1001 auto StoreWWMRegisters =
1003 for (const auto &Reg : WWMRegs) {
1004 Register VGPR = Reg.first;
1005 int FI = Reg.second;
1006 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1007 VGPR, FI, FrameReg);
1008 }
1009 };
1010
1011 for (const Register Reg : make_first_range(WWMScratchRegs)) {
1012 if (!MRI.isReserved(Reg)) {
1013 MRI.addLiveIn(Reg);
1014 MBB.addLiveIn(Reg);
1015 }
1016 }
1017 StoreWWMRegisters(WWMScratchRegs);
1018
1019 auto EnableAllLanes = [&]() {
1020 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);
1021 };
1022
1023 if (!WWMCalleeSavedRegs.empty()) {
1024 if (ScratchExecCopy) {
1025 EnableAllLanes();
1026 } else {
1027 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1028 /*IsProlog*/ true,
1029 /*EnableInactiveLanes*/ false);
1030 }
1031 }
1032
1033 StoreWWMRegisters(WWMCalleeSavedRegs);
1034 if (FuncInfo->isWholeWaveFunction()) {
1035 // SI_WHOLE_WAVE_FUNC_SETUP has outlived its purpose, so we can remove
1036 // it now. If we have already saved some WWM CSR registers, then the EXEC is
1037 // already -1 and we don't need to do anything else. Otherwise, set EXEC to
1038 // -1 here.
1039 if (!ScratchExecCopy)
1040 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, /*IsProlog*/ true,
1041 /*EnableInactiveLanes*/ true);
1042 else if (WWMCalleeSavedRegs.empty())
1043 EnableAllLanes();
1044 TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
1045 } else if (ScratchExecCopy) {
1046 // FIXME: Split block and make terminator.
1047 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
1048 .addReg(ScratchExecCopy, RegState::Kill);
1049 LiveUnits.addReg(ScratchExecCopy);
1050 }
1051
1052 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1053
1054 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1055 // Special handle FP spill:
1056 // Skip if FP is saved to a scratch SGPR, the save has already been emitted.
1057 // Otherwise, FP has been moved to a temporary register and spill it
1058 // instead.
1059 Register Reg =
1060 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1061 if (!Reg)
1062 continue;
1063
1064 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1065 LiveUnits, FrameReg);
1066 SB.save();
1067 }
1068
1069 // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make
1070 // such scratch registers live throughout the function.
1071 SmallVector<Register, 1> ScratchSGPRs;
1072 FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);
1073 if (!ScratchSGPRs.empty()) {
1074 for (MachineBasicBlock &MBB : MF) {
1075 for (MCPhysReg Reg : ScratchSGPRs)
1076 MBB.addLiveIn(Reg);
1077
1078 MBB.sortUniqueLiveIns();
1079 }
1080 if (!LiveUnits.empty()) {
1081 for (MCPhysReg Reg : ScratchSGPRs)
1082 LiveUnits.addReg(Reg);
1083 }
1084 }
1085}
1086
1090 Register FrameReg, Register FramePtrRegScratchCopy) const {
1091 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1092 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1093 const SIInstrInfo *TII = ST.getInstrInfo();
1094 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1096 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1097
1098 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1099 // Special handle FP restore:
1100 // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore
1101 // the FP value to a temporary register. The frame pointer should be
1102 // overwritten only at the end when all other spills are restored from
1103 // current frame.
1104 Register Reg =
1105 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1106 if (!Reg)
1107 continue;
1108
1109 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1110 LiveUnits, FrameReg);
1111 SB.restore();
1112 }
1113
1114 // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the
1115 // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to
1116 // this, we might end up flipping the EXEC bits twice.
1117 Register ScratchExecCopy;
1118 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1119 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1120 auto RestoreWWMRegisters =
1122 for (const auto &Reg : WWMRegs) {
1123 Register VGPR = Reg.first;
1124 int FI = Reg.second;
1125 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1126 VGPR, FI, FrameReg);
1127 }
1128 };
1129
1130 if (FuncInfo->isWholeWaveFunction()) {
1131 // For whole wave functions, the EXEC is already -1 at this point.
1132 // Therefore, we can restore the CSR WWM registers right away.
1133 RestoreWWMRegisters(WWMCalleeSavedRegs);
1134
1135 // The original EXEC is the first operand of the return instruction.
1136 MachineInstr &Return = MBB.instr_back();
1137 unsigned Opcode = Return.getOpcode();
1138 switch (Opcode) {
1139 case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
1140 Opcode = AMDGPU::SI_RETURN;
1141 break;
1142 case AMDGPU::SI_TCRETURN_GFX_WholeWave:
1143 Opcode = AMDGPU::SI_TCRETURN_GFX;
1144 break;
1145 default:
1146 llvm_unreachable("Unexpected return inst");
1147 }
1148 Register OrigExec = Return.getOperand(0).getReg();
1149
1150 if (!WWMScratchRegs.empty()) {
1151 BuildMI(MBB, MBBI, DL, TII->get(LMC.XorOpc), LMC.ExecReg)
1152 .addReg(OrigExec)
1153 .addImm(-1);
1154 RestoreWWMRegisters(WWMScratchRegs);
1155 }
1156
1157 // Restore original EXEC.
1158 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addReg(OrigExec);
1159
1160 // Drop the first operand and update the opcode.
1161 Return.removeOperand(0);
1162 Return.setDesc(TII->get(Opcode));
1163
1164 return;
1165 }
1166
1167 if (!WWMScratchRegs.empty()) {
1168 ScratchExecCopy =
1169 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1170 /*IsProlog=*/false, /*EnableInactiveLanes=*/true);
1171 }
1172 RestoreWWMRegisters(WWMScratchRegs);
1173 if (!WWMCalleeSavedRegs.empty()) {
1174 if (ScratchExecCopy) {
1175 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);
1176 } else {
1177 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1178 /*IsProlog*/ false,
1179 /*EnableInactiveLanes*/ false);
1180 }
1181 }
1182
1183 RestoreWWMRegisters(WWMCalleeSavedRegs);
1184 if (ScratchExecCopy) {
1185 // FIXME: Split block and make terminator.
1186 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
1187 .addReg(ScratchExecCopy, RegState::Kill);
1188 }
1189}
1190
1192 MachineBasicBlock &MBB) const {
1194 if (FuncInfo->isEntryFunction()) {
1196 return;
1197 }
1198
1199 MachineFrameInfo &MFI = MF.getFrameInfo();
1200 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1201 const SIInstrInfo *TII = ST.getInstrInfo();
1202 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1204
1205 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1206 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1207 Register BasePtrReg =
1208 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
1209 LiveRegUnits LiveUnits;
1210
1212 // DebugLoc must be unknown since the first instruction with DebugLoc is used
1213 // to determine the end of the prologue.
1214 DebugLoc DL;
1215
1216 if (FuncInfo->isChainFunction()) {
1217 // Functions with the amdgpu_cs_chain[_preserve] CC don't receive a SP, but
1218 // are free to set one up if they need it.
1219 bool UseSP = requiresStackPointerReference(MF);
1220 if (UseSP) {
1221 assert(StackPtrReg != AMDGPU::SP_REG);
1222
1223 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg)
1225 }
1226 }
1227
1228 bool HasFP = false;
1229 bool HasBP = false;
1230 uint32_t NumBytes = MFI.getStackSize();
1231 uint32_t RoundedSize = NumBytes;
1232
1233 if (TRI.hasStackRealignment(MF))
1234 HasFP = true;
1235
1236 Register FramePtrRegScratchCopy;
1237 if (!HasFP && !hasFP(MF)) {
1238 // Emit the CSR spill stores with SP base register.
1239 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits,
1240 FuncInfo->isChainFunction() ? Register() : StackPtrReg,
1241 FramePtrRegScratchCopy);
1242 } else {
1243 // CSR spill stores will use FP as base register.
1244 Register SGPRForFPSaveRestoreCopy =
1245 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1246
1247 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
1248 if (SGPRForFPSaveRestoreCopy) {
1249 // Copy FP to the scratch register now and emit the CFI entry. It avoids
1250 // the extra FP copy needed in the other two cases when FP is spilled to
1251 // memory or to a VGPR lane.
1253 FramePtrReg,
1254 FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
1255 DL, TII, TRI, LiveUnits, FramePtrReg);
1256 SB.save();
1257 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1258 } else {
1259 // Copy FP into a new scratch register so that its previous value can be
1260 // spilled after setting up the new frame.
1261 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1262 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1263 if (!FramePtrRegScratchCopy)
1264 report_fatal_error("failed to find free scratch register");
1265
1266 LiveUnits.addReg(FramePtrRegScratchCopy);
1267 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
1268 .addReg(FramePtrReg);
1269 }
1270 }
1271
1272 if (HasFP) {
1273 const unsigned Alignment = MFI.getMaxAlign().value();
1274
1275 RoundedSize += Alignment;
1276 if (LiveUnits.empty()) {
1277 LiveUnits.init(TRI);
1278 LiveUnits.addLiveIns(MBB);
1279 }
1280
1281 // s_add_i32 s33, s32, NumBytes
1282 // s_and_b32 s33, s33, 0b111...0000
1283 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
1284 .addReg(StackPtrReg)
1285 .addImm((Alignment - 1) * getScratchScaleFactor(ST))
1287 auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
1288 .addReg(FramePtrReg, RegState::Kill)
1289 .addImm(-Alignment * getScratchScaleFactor(ST))
1291 And->getOperand(3).setIsDead(); // Mark SCC as dead.
1292 FuncInfo->setIsStackRealigned(true);
1293 } else if ((HasFP = hasFP(MF))) {
1294 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1295 .addReg(StackPtrReg)
1297 }
1298
1299 // If FP is used, emit the CSR spills with FP base register.
1300 if (HasFP) {
1301 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1302 FramePtrRegScratchCopy);
1303 if (FramePtrRegScratchCopy)
1304 LiveUnits.removeReg(FramePtrRegScratchCopy);
1305 }
1306
1307 // If we need a base pointer, set it up here. It's whatever the value of
1308 // the stack pointer is at this point. Any variable size objects will be
1309 // allocated after this, so we can still use the base pointer to reference
1310 // the incoming arguments.
1311 if ((HasBP = TRI.hasBasePointer(MF))) {
1312 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1313 .addReg(StackPtrReg)
1315 }
1316
1317 if (HasFP && RoundedSize != 0) {
1318 auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1319 .addReg(StackPtrReg)
1320 .addImm(RoundedSize * getScratchScaleFactor(ST))
1322 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1323 }
1324
1325 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1326 (void)FPSaved;
1327 assert((!HasFP || FPSaved) &&
1328 "Needed to save FP but didn't save it anywhere");
1329
1330 // If we allow spilling to AGPRs we may have saved FP but then spill
1331 // everything into AGPRs instead of the stack.
1332 assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) &&
1333 "Saved FP but didn't need it");
1334
1335 bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);
1336 (void)BPSaved;
1337 assert((!HasBP || BPSaved) &&
1338 "Needed to save BP but didn't save it anywhere");
1339
1340 assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
1341}
1342
1344 MachineBasicBlock &MBB) const {
1345 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1346 if (FuncInfo->isEntryFunction())
1347 return;
1348
1349 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1350 const SIInstrInfo *TII = ST.getInstrInfo();
1351 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1353 LiveRegUnits LiveUnits;
1354 // Get the insert location for the epilogue. If there were no terminators in
1355 // the block, get the last instruction.
1357 DebugLoc DL;
1358 if (!MBB.empty()) {
1359 MBBI = MBB.getLastNonDebugInstr();
1360 if (MBBI != MBB.end())
1361 DL = MBBI->getDebugLoc();
1362
1363 MBBI = MBB.getFirstTerminator();
1364 }
1365
1366 const MachineFrameInfo &MFI = MF.getFrameInfo();
1367 uint32_t NumBytes = MFI.getStackSize();
1368 uint32_t RoundedSize = FuncInfo->isStackRealigned()
1369 ? NumBytes + MFI.getMaxAlign().value()
1370 : NumBytes;
1371 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1372 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1373 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1374
1375 if (RoundedSize != 0) {
1376 if (TRI.hasBasePointer(MF)) {
1377 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1378 .addReg(TRI.getBaseRegister())
1380 } else if (hasFP(MF)) {
1381 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1382 .addReg(FramePtrReg)
1384 }
1385 }
1386
1387 Register FramePtrRegScratchCopy;
1388 Register SGPRForFPSaveRestoreCopy =
1389 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1390 if (FPSaved) {
1391 // CSR spill restores should use FP as base register. If
1392 // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
1393 // into a new scratch register and copy to FP later when other registers are
1394 // restored from the current stack frame.
1395 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1396 if (SGPRForFPSaveRestoreCopy) {
1397 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1398 } else {
1399 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1400 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1401 if (!FramePtrRegScratchCopy)
1402 report_fatal_error("failed to find free scratch register");
1403
1404 LiveUnits.addReg(FramePtrRegScratchCopy);
1405 }
1406
1407 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1408 FramePtrRegScratchCopy);
1409 }
1410
1411 if (FPSaved) {
1412 // Insert the copy to restore FP.
1413 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1414 : FramePtrRegScratchCopy;
1416 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1417 .addReg(SrcReg);
1418 if (SGPRForFPSaveRestoreCopy)
1420 } else {
1421 // Insert the CSR spill restores with SP as the base register.
1422 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits,
1423 FuncInfo->isChainFunction() ? Register() : StackPtrReg,
1424 FramePtrRegScratchCopy);
1425 }
1426}
1427
1428#ifndef NDEBUG
1430 const MachineFrameInfo &MFI = MF.getFrameInfo();
1431 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1432 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1433 I != E; ++I) {
1434 if (!MFI.isDeadObjectIndex(I) &&
1437 return false;
1438 }
1439 }
1440
1441 return true;
1442}
1443#endif
1444
1446 int FI,
1447 Register &FrameReg) const {
1448 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1449
1450 FrameReg = RI->getFrameRegister(MF);
1452}
1453
1455 MachineFunction &MF,
1456 RegScavenger *RS) const {
1457 MachineFrameInfo &MFI = MF.getFrameInfo();
1458
1459 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1460 const SIInstrInfo *TII = ST.getInstrInfo();
1461 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1464
1465 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1467
1468 if (SpillVGPRToAGPR) {
1469 // To track the spill frame indices handled in this pass.
1470 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1471 BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
1472
1473 bool SeenDbgInstr = false;
1474
1475 for (MachineBasicBlock &MBB : MF) {
1477 int FrameIndex;
1478 if (MI.isDebugInstr())
1479 SeenDbgInstr = true;
1480
1481 if (TII->isVGPRSpill(MI)) {
1482 // Try to eliminate stack used by VGPR spills before frame
1483 // finalization.
1484 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1485 AMDGPU::OpName::vaddr);
1486 int FI = MI.getOperand(FIOp).getIndex();
1487 Register VReg =
1488 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1489 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1490 TRI->isAGPR(MRI, VReg))) {
1491 assert(RS != nullptr);
1493 RS->backward(std::next(MI.getIterator()));
1494 TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1495 SpillFIs.set(FI);
1496 continue;
1497 }
1498 } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
1499 TII->isLoadFromStackSlot(MI, FrameIndex))
1500 if (!MFI.isFixedObjectIndex(FrameIndex))
1501 NonVGPRSpillFIs.set(FrameIndex);
1502 }
1503 }
1504
1505 // Stack slot coloring may assign different objects to the same stack slot.
1506 // If not, then the VGPR to AGPR spill slot is dead.
1507 for (unsigned FI : SpillFIs.set_bits())
1508 if (!NonVGPRSpillFIs.test(FI))
1509 FuncInfo->setVGPRToAGPRSpillDead(FI);
1510
1511 for (MachineBasicBlock &MBB : MF) {
1512 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1513 MBB.addLiveIn(Reg);
1514
1515 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1516 MBB.addLiveIn(Reg);
1517
1518 MBB.sortUniqueLiveIns();
1519
1520 if (!SpillFIs.empty() && SeenDbgInstr) {
1521 // FIXME: The dead frame indices are replaced with a null register from
1522 // the debug value instructions. We should instead, update it with the
1523 // correct register value. But not sure the register value alone is
1524 for (MachineInstr &MI : MBB) {
1525 if (MI.isDebugValue()) {
1526 uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;
1527 if (MI.getOperand(StackOperandIdx).isFI() &&
1528 !MFI.isFixedObjectIndex(
1529 MI.getOperand(StackOperandIdx).getIndex()) &&
1530 SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {
1531 MI.getOperand(StackOperandIdx)
1532 .ChangeToRegister(Register(), false /*isDef*/);
1533 }
1534 }
1535 }
1536 }
1537 }
1538 }
1539
1540 // At this point we've already allocated all spilled SGPRs to VGPRs if we
1541 // can. Any remaining SGPR spills will go to memory, so move them back to the
1542 // default stack.
1543 bool HaveSGPRToVMemSpill =
1544 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1546 "SGPR spill should have been removed in SILowerSGPRSpills");
1547
1548 // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1549 // but currently hasNonSpillStackObjects is set only from source
1550 // allocas. Stack temps produced from legalization are not counted currently.
1551 if (!allStackObjectsAreDead(MFI)) {
1552 assert(RS && "RegScavenger required if spilling");
1553
1554 // Add an emergency spill slot
1555 RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1556
1557 // If we are spilling SGPRs to memory with a large frame, we may need a
1558 // second VGPR emergency frame index.
1559 if (HaveSGPRToVMemSpill &&
1562 }
1563 }
1564}
1565
1567 MachineFunction &MF, RegScavenger *RS) const {
1568 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1569 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1572
1573 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1574 // On gfx908, we had initially reserved highest available VGPR for AGPR
1575 // copy. Now since we are done with RA, check if there exist an unused VGPR
1576 // which is lower than the eariler reserved VGPR before RA. If one exist,
1577 // use it for AGPR copy instead of one reserved before RA.
1578 Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
1579 Register UnusedLowVGPR =
1580 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
1581 if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
1582 TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1583 // Reserve this newly identified VGPR (for AGPR copy)
1584 // reserved registers should already be frozen at this point
1585 // so we can avoid calling MRI.freezeReservedRegs and just use
1586 // MRI.reserveReg
1587 FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
1588 MRI.reserveReg(UnusedLowVGPR, TRI);
1589 }
1590 }
1591 // We initally reserved the highest available SGPR pair for long branches
1592 // now, after RA, we shift down to a lower unused one if one exists
1593 Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();
1594 Register UnusedLowSGPR =
1595 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
1596 // If LongBranchReservedReg is null then we didn't find a long branch
1597 // and never reserved a register to begin with so there is nothing to
1598 // shift down. Then if UnusedLowSGPR is null, there isn't available lower
1599 // register to use so just keep the original one we set.
1600 if (LongBranchReservedReg && UnusedLowSGPR) {
1601 FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);
1602 MRI.reserveReg(UnusedLowSGPR, TRI);
1603 }
1604}
1605
1606// The special SGPR spills like the one needed for FP, BP or any reserved
1607// registers delayed until frame lowering.
1609 MachineFunction &MF, BitVector &SavedVGPRs,
1610 bool NeedExecCopyReservedReg) const {
1611 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1614 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1615 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1616 LiveRegUnits LiveUnits;
1617 LiveUnits.init(*TRI);
1618 // Initially mark callee saved registers as used so we will not choose them
1619 // while looking for scratch SGPRs.
1620 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
1621 for (unsigned I = 0; CSRegs[I]; ++I)
1622 LiveUnits.addReg(CSRegs[I]);
1623
1624 const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
1625
1626 Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy();
1627 if (NeedExecCopyReservedReg ||
1628 (ReservedRegForExecCopy &&
1629 MRI.isPhysRegUsed(ReservedRegForExecCopy, /*SkipRegMaskTest=*/true))) {
1630 MRI.reserveReg(ReservedRegForExecCopy, TRI);
1631 Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
1632 if (UnusedScratchReg) {
1633 // If found any unused scratch SGPR, reserve the register itself for Exec
1634 // copy and there is no need for any spills in that case.
1635 MFI->setSGPRForEXECCopy(UnusedScratchReg);
1636 MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);
1637 LiveUnits.addReg(UnusedScratchReg);
1638 } else {
1639 // Needs spill.
1640 assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) &&
1641 "Re-reserving spill slot for EXEC copy register");
1642 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedRegForExecCopy, RC,
1643 /*IncludeScratchCopy=*/false);
1644 }
1645 } else if (ReservedRegForExecCopy) {
1646 // Reset it at this point. There are no whole-wave copies and spills
1647 // encountered.
1648 MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);
1649 }
1650
1651 // hasFP only knows about stack objects that already exist. We're now
1652 // determining the stack slots that will be created, so we have to predict
1653 // them. Stack objects force FP usage with calls.
1654 //
1655 // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1656 // don't want to report it here.
1657 //
1658 // FIXME: Is this really hasReservedCallFrame?
1659 const bool WillHaveFP =
1660 FrameInfo.hasCalls() &&
1661 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1662
1663 if (WillHaveFP || hasFP(MF)) {
1664 Register FramePtrReg = MFI->getFrameOffsetReg();
1665 assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
1666 "Re-reserving spill slot for FP");
1667 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
1668 }
1669
1670 if (TRI->hasBasePointer(MF)) {
1671 Register BasePtrReg = TRI->getBaseRegister();
1672 assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
1673 "Re-reserving spill slot for BP");
1674 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
1675 }
1676}
1677
1678// Only report VGPRs to generic code.
1680 BitVector &SavedVGPRs,
1681 RegScavenger *RS) const {
1683
1684 // If this is a function with the amdgpu_cs_chain[_preserve] calling
1685 // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then
1686 // we don't need to save and restore anything.
1687 if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
1688 return;
1689
1691
1692 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1693 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1694 const SIInstrInfo *TII = ST.getInstrInfo();
1695 bool NeedExecCopyReservedReg = false;
1696
1697 MachineInstr *ReturnMI = nullptr;
1698 for (MachineBasicBlock &MBB : MF) {
1699 for (MachineInstr &MI : MBB) {
1700 // TODO: Walking through all MBBs here would be a bad heuristic. Better
1701 // handle them elsewhere.
1702 if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
1703 NeedExecCopyReservedReg = true;
1704 else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
1705 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
1706 MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1707 (MFI->isChainFunction() &&
1708 TII->isChainCallOpcode(MI.getOpcode()))) {
1709 // We expect all return to be the same size.
1710 assert(!ReturnMI ||
1711 (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
1712 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
1713 ReturnMI = &MI;
1714 }
1715 }
1716 }
1717
1718 SmallVector<Register> SortedWWMVGPRs;
1719 for (Register Reg : MFI->getWWMReservedRegs()) {
1720 // The shift-back is needed only for the VGPRs used for SGPR spills and they
1721 // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM
1722 // reserved registers.
1723 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1724 if (TRI->getRegSizeInBits(*RC) != 32)
1725 continue;
1726 SortedWWMVGPRs.push_back(Reg);
1727 }
1728
1729 sort(SortedWWMVGPRs, std::greater<Register>());
1730 MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs);
1731
1732 if (MFI->isEntryFunction())
1733 return;
1734
1735 if (MFI->isWholeWaveFunction()) {
1736 // In practice, all the VGPRs are WWM registers, and we will need to save at
1737 // least their inactive lanes. Add them to WWMReservedRegs.
1738 assert(!NeedExecCopyReservedReg &&
1739 "Whole wave functions can use the reg mapped for their i1 argument");
1740
1741 // FIXME: Be more efficient!
1742 unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;
1743 for (MCRegister Reg :
1744 AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))
1745 if (MF.getRegInfo().isPhysRegModified(Reg)) {
1746 MFI->reserveWWMRegister(Reg);
1747 MF.begin()->addLiveIn(Reg);
1748 }
1749 MF.begin()->sortUniqueLiveIns();
1750 }
1751
1752 // Remove any VGPRs used in the return value because these do not need to be saved.
1753 // This prevents CSR restore from clobbering return VGPRs.
1754 if (ReturnMI) {
1755 for (auto &Op : ReturnMI->operands()) {
1756 if (Op.isReg())
1757 SavedVGPRs.reset(Op.getReg());
1758 }
1759 }
1760
1761 // Create the stack objects for WWM registers now.
1762 for (Register Reg : MFI->getWWMReservedRegs()) {
1763 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1764 MFI->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
1765 TRI->getSpillAlign(*RC));
1766 }
1767
1768 // Ignore the SGPRs the default implementation found.
1769 SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1770
1771 // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1772 // In gfx908 there was do AGPR loads and stores and thus spilling also
1773 // require a temporary VGPR.
1774 if (!ST.hasGFX90AInsts())
1775 SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1776
1777 determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
1778
1779 // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
1780 // allow the default insertion to handle them.
1781 for (auto &Reg : MFI->getWWMSpills())
1782 SavedVGPRs.reset(Reg.first);
1783}
1784
1786 BitVector &SavedRegs,
1787 RegScavenger *RS) const {
1790 if (MFI->isEntryFunction())
1791 return;
1792
1793 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1794 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1795
1796 // The SP is specifically managed and we don't want extra spills of it.
1797 SavedRegs.reset(MFI->getStackPtrOffsetReg());
1798
1799 const BitVector AllSavedRegs = SavedRegs;
1800 SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1801
1802 // We have to anticipate introducing CSR VGPR spills or spill of caller
1803 // save VGPR reserved for SGPR spills as we now always create stack entry
1804 // for it, if we don't have any stack objects already, since we require a FP
1805 // if there is a call and stack. We will allocate a VGPR for SGPR spills if
1806 // there are any SGPR spills. Whether they are CSR spills or otherwise.
1807 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1808 const bool WillHaveFP =
1809 FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
1810
1811 // FP will be specially managed like SP.
1812 if (WillHaveFP || hasFP(MF))
1813 SavedRegs.reset(MFI->getFrameOffsetReg());
1814
1815 // Return address use with return instruction is hidden through the SI_RETURN
1816 // pseudo. Given that and since the IPRA computes actual register usage and
1817 // does not use CSR list, the clobbering of return address by function calls
1818 // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
1819 // usage collection. This will ensure save/restore of return address happens
1820 // in those scenarios.
1821 const MachineRegisterInfo &MRI = MF.getRegInfo();
1822 Register RetAddrReg = TRI->getReturnAddressReg(MF);
1823 if (!MFI->isEntryFunction() &&
1824 (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
1825 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1826 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1827 }
1828}
1829
1831 const GCNSubtarget &ST,
1832 std::vector<CalleeSavedInfo> &CSI,
1833 unsigned &MinCSFrameIndex,
1834 unsigned &MaxCSFrameIndex) {
1836 MachineFrameInfo &MFI = MF.getFrameInfo();
1837 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1838
1839 assert(
1840 llvm::is_sorted(CSI,
1841 [](const CalleeSavedInfo &A, const CalleeSavedInfo &B) {
1842 return A.getReg() < B.getReg();
1843 }) &&
1844 "Callee saved registers not sorted");
1845
1846 auto CanUseBlockOps = [&](const CalleeSavedInfo &CSI) {
1847 return !CSI.isSpilledToReg() &&
1848 TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&
1849 !FuncInfo->isWWMReservedRegister(CSI.getReg());
1850 };
1851
1852 auto CSEnd = CSI.end();
1853 for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {
1854 Register Reg = CSIt->getReg();
1855 if (!CanUseBlockOps(*CSIt))
1856 continue;
1857
1858 // Find all the regs that will fit in a 32-bit mask starting at the current
1859 // reg and build said mask. It should have 1 for every register that's
1860 // included, with the current register as the least significant bit.
1861 uint32_t Mask = 1;
1862 CSEnd = std::remove_if(
1863 CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {
1864 if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {
1865 Mask |= 1 << (CSI.getReg() - Reg);
1866 return true;
1867 } else {
1868 return false;
1869 }
1870 });
1871
1872 const TargetRegisterClass *BlockRegClass = TRI->getRegClassForBlockOp(MF);
1873 Register RegBlock =
1874 TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);
1875 if (!RegBlock) {
1876 // We couldn't find a super register for the block. This can happen if
1877 // the register we started with is too high (e.g. v232 if the maximum is
1878 // v255). We therefore try to get the last register block and figure out
1879 // the mask from there.
1880 Register LastBlockStart =
1881 AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32);
1882 RegBlock =
1883 TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);
1884 assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) &&
1885 "Couldn't find super register");
1886 int RegDelta = Reg - LastBlockStart;
1887 assert(RegDelta > 0 && llvm::countl_zero(Mask) >= RegDelta &&
1888 "Bad shift amount");
1889 Mask <<= RegDelta;
1890 }
1891
1892 FuncInfo->setMaskForVGPRBlockOps(RegBlock, Mask);
1893
1894 // The stack objects can be a bit smaller than the register block if we know
1895 // some of the high bits of Mask are 0. This may happen often with calling
1896 // conventions where the caller and callee-saved VGPRs are interleaved at
1897 // a small boundary (e.g. 8 or 16).
1898 int UnusedBits = llvm::countl_zero(Mask);
1899 unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;
1900 int FrameIdx =
1901 MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass),
1902 /*isSpillSlot=*/true);
1903 if ((unsigned)FrameIdx < MinCSFrameIndex)
1904 MinCSFrameIndex = FrameIdx;
1905 if ((unsigned)FrameIdx > MaxCSFrameIndex)
1906 MaxCSFrameIndex = FrameIdx;
1907
1908 CSIt->setFrameIdx(FrameIdx);
1909 CSIt->setReg(RegBlock);
1910 }
1911 CSI.erase(CSEnd, CSI.end());
1912}
1913
1916 std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
1917 unsigned &MaxCSFrameIndex) const {
1918 if (CSI.empty())
1919 return true; // Early exit if no callee saved registers are modified!
1920
1921 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1922 bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
1923
1924 if (UseVGPRBlocks)
1925 assignSlotsUsingVGPRBlocks(MF, ST, CSI, MinCSFrameIndex, MaxCSFrameIndex);
1926
1927 return assignCalleeSavedSpillSlots(MF, TRI, CSI) || UseVGPRBlocks;
1928}
1929
1932 std::vector<CalleeSavedInfo> &CSI) const {
1933 if (CSI.empty())
1934 return true; // Early exit if no callee saved registers are modified!
1935
1936 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1937 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1938 const SIRegisterInfo *RI = ST.getRegisterInfo();
1939 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1940 Register BasePtrReg = RI->getBaseRegister();
1941 Register SGPRForFPSaveRestoreCopy =
1942 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1943 Register SGPRForBPSaveRestoreCopy =
1944 FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);
1945 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1946 return false;
1947
1948 unsigned NumModifiedRegs = 0;
1949
1950 if (SGPRForFPSaveRestoreCopy)
1951 NumModifiedRegs++;
1952 if (SGPRForBPSaveRestoreCopy)
1953 NumModifiedRegs++;
1954
1955 for (auto &CS : CSI) {
1956 if (CS.getReg() == FramePtrReg.asMCReg() && SGPRForFPSaveRestoreCopy) {
1957 CS.setDstReg(SGPRForFPSaveRestoreCopy);
1958 if (--NumModifiedRegs)
1959 break;
1960 } else if (CS.getReg() == BasePtrReg.asMCReg() &&
1961 SGPRForBPSaveRestoreCopy) {
1962 CS.setDstReg(SGPRForBPSaveRestoreCopy);
1963 if (--NumModifiedRegs)
1964 break;
1965 }
1966 }
1967
1968 return false;
1969}
1970
1972 const MachineFunction &MF) const {
1973
1974 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1975 const MachineFrameInfo &MFI = MF.getFrameInfo();
1976 const SIInstrInfo *TII = ST.getInstrInfo();
1977 uint64_t EstStackSize = MFI.estimateStackSize(MF);
1978 uint64_t MaxOffset = EstStackSize - 1;
1979
1980 // We need the emergency stack slots to be allocated in range of the
1981 // MUBUF/flat scratch immediate offset from the base register, so assign these
1982 // first at the incoming SP position.
1983 //
1984 // TODO: We could try sorting the objects to find a hole in the first bytes
1985 // rather than allocating as close to possible. This could save a lot of space
1986 // on frames with alignment requirements.
1987 if (ST.enableFlatScratch()) {
1988 if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1990 return false;
1991 } else {
1992 if (TII->isLegalMUBUFImmOffset(MaxOffset))
1993 return false;
1994 }
1995
1996 return true;
1997}
1998
2002 MachineFunction *MF = MBB.getParent();
2003 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2004 if (!ST.useVGPRBlockOpsForCSR())
2005 return false;
2006
2007 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2009 const SIInstrInfo *TII = ST.getInstrInfo();
2011
2012 const TargetRegisterClass *BlockRegClass =
2013 static_cast<const SIRegisterInfo *>(TRI)->getRegClassForBlockOp(*MF);
2014 for (const CalleeSavedInfo &CS : CSI) {
2015 Register Reg = CS.getReg();
2016 if (!BlockRegClass->contains(Reg) ||
2017 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2019 continue;
2020 }
2021
2022 // Build a scratch block store.
2023 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2024 int FrameIndex = CS.getFrameIdx();
2025 MachinePointerInfo PtrInfo =
2026 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2027 MachineMemOperand *MMO =
2029 FrameInfo.getObjectSize(FrameIndex),
2030 FrameInfo.getObjectAlign(FrameIndex));
2031
2032 BuildMI(MBB, MI, MI->getDebugLoc(),
2033 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2034 .addReg(Reg, getKillRegState(false))
2035 .addFrameIndex(FrameIndex)
2037 .addImm(0)
2038 .addImm(Mask)
2039 .addMemOperand(MMO);
2040
2041 FuncInfo->setHasSpilledVGPRs();
2042
2043 // Add the register to the liveins. This is necessary because if any of the
2044 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2045 // then the whole block will be marked as reserved and `updateLiveness` will
2046 // skip it.
2047 MBB.addLiveIn(Reg);
2048 }
2049 MBB.sortUniqueLiveIns();
2050
2051 return true;
2052}
2053
2057 MachineFunction *MF = MBB.getParent();
2058 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2059 if (!ST.useVGPRBlockOpsForCSR())
2060 return false;
2061
2063 MachineFrameInfo &MFI = MF->getFrameInfo();
2064 const SIInstrInfo *TII = ST.getInstrInfo();
2065 const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
2066 const TargetRegisterClass *BlockRegClass = SITRI->getRegClassForBlockOp(*MF);
2067 for (const CalleeSavedInfo &CS : reverse(CSI)) {
2068 Register Reg = CS.getReg();
2069 if (!BlockRegClass->contains(Reg) ||
2070 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2072 continue;
2073 }
2074
2075 // Build a scratch block load.
2076 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2077 int FrameIndex = CS.getFrameIdx();
2078 MachinePointerInfo PtrInfo =
2079 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2081 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
2082 MFI.getObjectAlign(FrameIndex));
2083
2084 auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
2085 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)
2086 .addFrameIndex(FrameIndex)
2087 .addReg(FuncInfo->getStackPtrOffsetReg())
2088 .addImm(0)
2089 .addImm(Mask)
2090 .addMemOperand(MMO);
2091 SITRI->addImplicitUsesForBlockCSRLoad(MIB, Reg);
2092
2093 // Add the register to the liveins. This is necessary because if any of the
2094 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2095 // then the whole block will be marked as reserved and `updateLiveness` will
2096 // skip it.
2097 MBB.addLiveIn(Reg);
2098 }
2099
2100 MBB.sortUniqueLiveIns();
2101 return true;
2102}
2103
2105 MachineFunction &MF,
2108 int64_t Amount = I->getOperand(0).getImm();
2109 if (Amount == 0)
2110 return MBB.erase(I);
2111
2112 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2113 const SIInstrInfo *TII = ST.getInstrInfo();
2114 const DebugLoc &DL = I->getDebugLoc();
2115 unsigned Opc = I->getOpcode();
2116 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
2117 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2118
2119 if (!hasReservedCallFrame(MF)) {
2120 Amount = alignTo(Amount, getStackAlign());
2121 assert(isUInt<32>(Amount) && "exceeded stack address space size");
2124
2125 Amount *= getScratchScaleFactor(ST);
2126 if (IsDestroy)
2127 Amount = -Amount;
2128 auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
2129 .addReg(SPReg)
2130 .addImm(Amount);
2131 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
2132 } else if (CalleePopAmount != 0) {
2133 llvm_unreachable("is this used?");
2134 }
2135
2136 return MBB.erase(I);
2137}
2138
2139/// Returns true if the frame will require a reference to the stack pointer.
2140///
2141/// This is the set of conditions common to setting up the stack pointer in a
2142/// kernel, and for using a frame pointer in a callable function.
2143///
2144/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
2145/// references SP.
2147 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
2148}
2149
2150// The FP for kernels is always known 0, so we never really need to setup an
2151// explicit register for it. However, DisableFramePointerElim will force us to
2152// use a register for it.
2154 const MachineFrameInfo &MFI = MF.getFrameInfo();
2155
2156 // For entry & chain functions we can use an immediate offset in most cases,
2157 // so the presence of calls doesn't imply we need a distinct frame pointer.
2158 if (MFI.hasCalls() &&
2161 // All offsets are unsigned, so need to be addressed in the same direction
2162 // as stack growth.
2163
2164 // FIXME: This function is pretty broken, since it can be called before the
2165 // frame layout is determined or CSR spills are inserted.
2166 return MFI.getStackSize() != 0;
2167 }
2168
2169 return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
2170 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
2171 MF) ||
2174}
2175
2177 const MachineFunction &MF) const {
2178 return MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() &&
2181}
2182
2183// This is essentially a reduced version of hasFP for entry functions. Since the
2184// stack pointer is known 0 on entry to kernels, we never really need an FP
2185// register. We may need to initialize the stack pointer depending on the frame
2186// properties, which logically overlaps many of the cases where an ordinary
2187// function would require an FP.
2188// Also used for chain functions. While not technically entry functions, chain
2189// functions may need to set up a stack pointer in some situations.
2191 const MachineFunction &MF) const {
2192 // Callable functions always require a stack pointer reference.
2195 "only expected to call this for entry points and chain functions");
2196
2197 const MachineFrameInfo &MFI = MF.getFrameInfo();
2198
2199 // Entry points ordinarily don't need to initialize SP. We have to set it up
2200 // for callees if there are any. Also note tail calls are impossible/don't
2201 // make any sense for kernels.
2202 if (MFI.hasCalls())
2203 return true;
2204
2205 // We still need to initialize the SP if we're doing anything weird that
2206 // references the SP, like variable sized stack objects.
2207 return frameTriviallyRequiresSP(MFI);
2208}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex)
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static const int BlockSize
Definition TarWriter.cpp:33
static const LaneMaskConstants & get(const GCNSubtarget &ST)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
bool test(unsigned Idx) const
Definition BitVector.h:461
BitVector & reset()
Definition BitVector.h:392
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
Definition BitVector.h:725
BitVector & set()
Definition BitVector.h:351
bool any() const
any - Returns true if any bit is set.
Definition BitVector.h:170
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition BitVector.h:713
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:140
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition BitVector.h:156
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
MCRegister getReg() const
A debug info location.
Definition DebugLoc.h:124
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
bool empty() const
Returns true if the set is empty.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
bool hasTailCall() const
Returns true if the function contains a tail call.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
mop_range operands()
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setIsDead(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
void backward()
Update internal register state and move MBB iterator backwards.
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool mayReserveScratchForCWSR(const MachineFunction &MF) const
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
bool requiresStackPointerReference(const MachineFunction &MF) const
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
bool hasFPImpl(const MachineFunction &MF) const override
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
bool isSupportedStackID(TargetStackID::Value ID) const override
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
const WWMSpillsMap & getWWMSpills() const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)
GCNUserSGPRUsageInfo & getUserSGPRInfo()
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
void setVGPRToAGPRSpillDead(int FrameIndex)
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
bool isWWMReservedRegister(Register Reg) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
void setLongBranchReservedReg(Register Reg)
void setHasSpilledVGPRs(bool Spill=true)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
const ReservedRegSet & getWWMReservedRegs() const
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setIsStackRealigned(bool Realigned=true)
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
Register getFrameRegister(const MachineFunction &MF) const override
const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction &MF) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegister - Default implementation for spilling a single callee saved register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetOptions Options
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:222
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1900
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1941
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.