LLVM  12.0.0git
SIFrameLowering.cpp
Go to the documentation of this file.
1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPUSubtarget.h"
11 #include "SIInstrInfo.h"
12 #include "SIMachineFunctionInfo.h"
13 #include "SIRegisterInfo.h"
15 
21 
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "frame-info"
25 
26 
27 // Find a scratch register that we can use at the start of the prologue to
28 // re-align the stack pointer. We avoid using callee-save registers since they
29 // may appear to be free when this is called from canUseAsPrologue (during
30 // shrink wrapping), but then no longer be free when this is called from
31 // emitPrologue.
32 //
33 // FIXME: This is a bit conservative, since in the above case we could use one
34 // of the callee-save registers as a scratch temp to re-align the stack pointer,
35 // but we would then have to make sure that we were in fact saving at least one
36 // callee-save register in the prologue, which is additional complexity that
37 // doesn't seem worth the benefit.
39  LivePhysRegs &LiveRegs,
40  const TargetRegisterClass &RC,
41  bool Unused = false) {
42  // Mark callee saved registers as used so we will not choose them.
43  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
44  for (unsigned i = 0; CSRegs[i]; ++i)
45  LiveRegs.addReg(CSRegs[i]);
46 
47  if (Unused) {
48  // We are looking for a register that can be used throughout the entire
49  // function, so any use is unacceptable.
50  for (MCRegister Reg : RC) {
51  if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
52  return Reg;
53  }
54  } else {
55  for (MCRegister Reg : RC) {
56  if (LiveRegs.available(MRI, Reg))
57  return Reg;
58  }
59  }
60 
61  // If we require an unused register, this is used in contexts where failure is
62  // an option and has an alternative plan. In other contexts, this must
63  // succeed0.
64  if (!Unused)
65  report_fatal_error("failed to find free scratch register");
66 
67  return MCRegister();
68 }
69 
71  LivePhysRegs &LiveRegs,
72  Register &TempSGPR,
74  bool IsFP) {
76  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
77 
78 #ifndef NDEBUG
80  const SIRegisterInfo *TRI = ST.getRegisterInfo();
81 #endif
82 
83  // We need to save and restore the current FP/BP.
84 
85  // 1: If there is already a VGPR with free lanes, use it. We
86  // may already have to pay the penalty for spilling a CSR VGPR.
87  if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
88  int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
90 
91  if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
92  llvm_unreachable("allocate SGPR spill should have worked");
93 
94  FrameIndex = NewFI;
95 
96  LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
97  dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to "
98  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
99  << '\n');
100  return;
101  }
102 
103  // 2: Next, try to save the FP/BP in an unused SGPR.
105  MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
106 
107  if (!TempSGPR) {
108  int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
110 
111  if (MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
112  // 3: There's no free lane to spill, and no free register to save FP/BP,
113  // so we're forced to spill another VGPR to use for the spill.
114  FrameIndex = NewFI;
115  } else {
116  // 4: If all else fails, spill the FP/BP to memory.
117  FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
118  }
119 
120  LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
121  dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
122  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
123  << '\n';);
124  } else {
125  LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
126  << printReg(TempSGPR, TRI) << '\n');
127  }
128 }
129 
130 // We need to specially emit stack operations here because a different frame
131 // register is used than in the rest of the function, as getFrameRegister would
132 // use.
135  const SIInstrInfo *TII, Register SpillReg,
136  Register ScratchRsrcReg, Register SPReg, int FI) {
137  MachineFunction *MF = MBB.getParent();
138  MachineFrameInfo &MFI = MF->getFrameInfo();
139 
140  int64_t Offset = MFI.getObjectOffset(FI);
141 
144  MFI.getObjectAlign(FI));
145 
146  if (isUInt<12>(Offset)) {
147  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
148  .addReg(SpillReg, RegState::Kill)
149  .addReg(ScratchRsrcReg)
150  .addReg(SPReg)
151  .addImm(Offset)
152  .addImm(0) // glc
153  .addImm(0) // slc
154  .addImm(0) // tfe
155  .addImm(0) // dlc
156  .addImm(0) // swz
157  .addMemOperand(MMO);
158  return;
159  }
160 
162  MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
163 
164  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
165  .addImm(Offset);
166 
167  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
168  .addReg(SpillReg, RegState::Kill)
169  .addReg(OffsetReg, RegState::Kill)
170  .addReg(ScratchRsrcReg)
171  .addReg(SPReg)
172  .addImm(0)
173  .addImm(0) // glc
174  .addImm(0) // slc
175  .addImm(0) // tfe
176  .addImm(0) // dlc
177  .addImm(0) // swz
178  .addMemOperand(MMO);
179 }
180 
183  const SIInstrInfo *TII, Register SpillReg,
184  Register ScratchRsrcReg, Register SPReg, int FI) {
185  MachineFunction *MF = MBB.getParent();
186  MachineFrameInfo &MFI = MF->getFrameInfo();
187  int64_t Offset = MFI.getObjectOffset(FI);
188 
191  MFI.getObjectAlign(FI));
192 
193  if (isUInt<12>(Offset)) {
194  BuildMI(MBB, I, DebugLoc(),
195  TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg)
196  .addReg(ScratchRsrcReg)
197  .addReg(SPReg)
198  .addImm(Offset)
199  .addImm(0) // glc
200  .addImm(0) // slc
201  .addImm(0) // tfe
202  .addImm(0) // dlc
203  .addImm(0) // swz
204  .addMemOperand(MMO);
205  return;
206  }
207 
209  MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
210 
211  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
212  .addImm(Offset);
213 
214  BuildMI(MBB, I, DebugLoc(),
215  TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg)
216  .addReg(OffsetReg, RegState::Kill)
217  .addReg(ScratchRsrcReg)
218  .addReg(SPReg)
219  .addImm(0)
220  .addImm(0) // glc
221  .addImm(0) // slc
222  .addImm(0) // tfe
223  .addImm(0) // dlc
224  .addImm(0) // swz
225  .addMemOperand(MMO);
226 }
227 
228 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
229 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
231  const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
232  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
233  const SIInstrInfo *TII = ST.getInstrInfo();
234  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
236 
237  // We don't need this if we only have spills since there is no user facing
238  // scratch.
239 
240  // TODO: If we know we don't have flat instructions earlier, we can omit
241  // this from the input registers.
242  //
243  // TODO: We only need to know if we access scratch space through a flat
244  // pointer. Because we only detect if flat instructions are used at all,
245  // this will be used more often than necessary on VI.
246 
247  Register FlatScratchInitReg =
249 
251  MRI.addLiveIn(FlatScratchInitReg);
252  MBB.addLiveIn(FlatScratchInitReg);
253 
254  Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
255  Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
256 
257  // Do a 64-bit pointer add.
258  if (ST.flatScratchIsPointer()) {
260  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
261  .addReg(FlatScrInitLo)
262  .addReg(ScratchWaveOffsetReg);
263  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
264  .addReg(FlatScrInitHi)
265  .addImm(0);
266  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
267  addReg(FlatScrInitLo).
268  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
270  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
271  addReg(FlatScrInitHi).
272  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
274  return;
275  }
276 
277  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
278  .addReg(FlatScrInitLo)
279  .addReg(ScratchWaveOffsetReg);
280  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
281  .addReg(FlatScrInitHi)
282  .addImm(0);
283 
284  return;
285  }
286 
288 
289  // Copy the size in bytes.
290  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
291  .addReg(FlatScrInitHi, RegState::Kill);
292 
293  // Add wave offset in bytes to private base offset.
294  // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
295  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
296  .addReg(FlatScrInitLo)
297  .addReg(ScratchWaveOffsetReg);
298 
299  // Convert offset to 256-byte units.
300  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
301  .addReg(FlatScrInitLo, RegState::Kill)
302  .addImm(8);
303 }
304 
305 // Shift down registers reserved for the scratch RSRC.
306 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
307  MachineFunction &MF) const {
308 
309  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
310  const SIInstrInfo *TII = ST.getInstrInfo();
311  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
314 
315  assert(MFI->isEntryFunction());
316 
317  Register ScratchRsrcReg = MFI->getScratchRSrcReg();
318 
319  if (!ScratchRsrcReg || !MRI.isPhysRegUsed(ScratchRsrcReg))
320  return Register();
321 
322  if (ST.hasSGPRInitBug() ||
323  ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
324  return ScratchRsrcReg;
325 
326  // We reserved the last registers for this. Shift it down to the end of those
327  // which were actually used.
328  //
329  // FIXME: It might be safer to use a pseudoregister before replacement.
330 
331  // FIXME: We should be able to eliminate unused input registers. We only
332  // cannot do this for the resources required for scratch access. For now we
333  // skip over user SGPRs and may leave unused holes.
334 
335  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
336  ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
337  AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
338 
339  // Skip the last N reserved elements because they should have already been
340  // reserved for VCC etc.
341  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
342  for (MCPhysReg Reg : AllSGPR128s) {
343  // Pick the first unallocated one. Make sure we don't clobber the other
344  // reserved input we needed. Also for PAL, make sure we don't clobber
345  // the GIT pointer passed in SGPR0 or SGPR8.
346  if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
347  !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
348  MRI.replaceRegWith(ScratchRsrcReg, Reg);
349  MFI->setScratchRSrcReg(Reg);
350  return Reg;
351  }
352  }
353 
354  return ScratchRsrcReg;
355 }
356 
358  MachineBasicBlock &MBB) const {
359  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
360 
361  // FIXME: If we only have SGPR spills, we won't actually be using scratch
362  // memory since these spill to VGPRs. We should be cleaning up these unused
363  // SGPR spill frame indices somewhere.
364 
365  // FIXME: We still have implicit uses on SGPR spill instructions in case they
366  // need to spill to vector memory. It's likely that will not happen, but at
367  // this point it appears we need the setup. This part of the prolog should be
368  // emitted after frame indices are eliminated.
369 
370  // FIXME: Remove all of the isPhysRegUsed checks
371 
373  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
374  const SIInstrInfo *TII = ST.getInstrInfo();
375  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
377  const Function &F = MF.getFunction();
378 
379  assert(MFI->isEntryFunction());
380 
381  Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
383  // FIXME: Hack to not crash in situations which emitted an error.
384  if (!PreloadedScratchWaveOffsetReg)
385  return;
386 
387  // We need to do the replacement of the private segment buffer register even
388  // if there are no stack objects. There could be stores to undef or a
389  // constant without an associated object.
390  //
391  // This will return `Register()` in cases where there are no actual
392  // uses of the SRSRC.
393  Register ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
394 
395  // Make the selected register live throughout the function.
396  if (ScratchRsrcReg) {
397  for (MachineBasicBlock &OtherBB : MF) {
398  if (&OtherBB != &MBB) {
399  OtherBB.addLiveIn(ScratchRsrcReg);
400  }
401  }
402  }
403 
404  // Now that we have fixed the reserved SRSRC we need to locate the
405  // (potentially) preloaded SRSRC.
406  Register PreloadedScratchRsrcReg;
407  if (ST.isAmdHsaOrMesa(F)) {
408  PreloadedScratchRsrcReg =
409  MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
410  if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
411  // We added live-ins during argument lowering, but since they were not
412  // used they were deleted. We're adding the uses now, so add them back.
413  MRI.addLiveIn(PreloadedScratchRsrcReg);
414  MBB.addLiveIn(PreloadedScratchRsrcReg);
415  }
416  }
417 
418  // Debug location must be unknown since the first debug location is used to
419  // determine the end of the prologue.
420  DebugLoc DL;
422 
423  // We found the SRSRC first because it needs four registers and has an
424  // alignment requirement. If the SRSRC that we found is clobbering with
425  // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
426  // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
427  // wave offset to a free SGPR.
428  Register ScratchWaveOffsetReg;
429  if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
430  ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
431  unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
432  AllSGPRs = AllSGPRs.slice(
433  std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
434  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
435  for (MCPhysReg Reg : AllSGPRs) {
436  if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
437  !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
438  ScratchWaveOffsetReg = Reg;
439  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
440  .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
441  break;
442  }
443  }
444  } else {
445  ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
446  }
447  assert(ScratchWaveOffsetReg);
448 
449  if (MF.getFrameInfo().hasCalls()) {
450  Register SPReg = MFI->getStackPtrOffsetReg();
451  assert(SPReg != AMDGPU::SP_REG);
452  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
453  .addImm(MF.getFrameInfo().getStackSize() * ST.getWavefrontSize());
454  }
455 
456  if (hasFP(MF)) {
457  Register FPReg = MFI->getFrameOffsetReg();
458  assert(FPReg != AMDGPU::FP_REG);
459  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
460  }
461 
462  if (MFI->hasFlatScratchInit() || ScratchRsrcReg) {
463  MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
464  MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
465  }
466 
467  if (MFI->hasFlatScratchInit()) {
468  emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
469  }
470 
471  if (ScratchRsrcReg) {
472  emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
473  PreloadedScratchRsrcReg,
474  ScratchRsrcReg, ScratchWaveOffsetReg);
475  }
476 }
477 
478 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
479 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
481  const DebugLoc &DL, Register PreloadedScratchRsrcReg,
482  Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
483 
484  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
485  const SIInstrInfo *TII = ST.getInstrInfo();
486  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
488  const Function &Fn = MF.getFunction();
489 
490  if (ST.isAmdPalOS()) {
491  // The pointer to the GIT is formed from the offset passed in and either
492  // the amdgpu-git-ptr-high function attribute or the top part of the PC
493  Register RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
494  Register RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
495  Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
496 
497  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
498 
499  if (MFI->getGITPtrHigh() != 0xffffffff) {
500  BuildMI(MBB, I, DL, SMovB32, RsrcHi)
501  .addImm(MFI->getGITPtrHigh())
502  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
503  } else {
504  const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
505  BuildMI(MBB, I, DL, GetPC64, Rsrc01);
506  }
507  Register GitPtrLo = MFI->getGITPtrLoReg(MF);
508  MF.getRegInfo().addLiveIn(GitPtrLo);
509  MBB.addLiveIn(GitPtrLo);
510  BuildMI(MBB, I, DL, SMovB32, RsrcLo)
511  .addReg(GitPtrLo)
512  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
513 
514  // We now have the GIT ptr - now get the scratch descriptor from the entry
515  // at offset 0 (or offset 16 for a compute shader).
517  const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
518  auto MMO = MF.getMachineMemOperand(PtrInfo,
522  16, Align(4));
523  unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
524  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
525  unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
526  BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
527  .addReg(Rsrc01)
528  .addImm(EncodedOffset) // offset
529  .addImm(0) // glc
530  .addImm(0) // dlc
531  .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
532  .addMemOperand(MMO);
533  } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
534  assert(!ST.isAmdHsaOrMesa(Fn));
535  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
536 
537  Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
538  Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
539 
540  // Use relocations to get the pointer, and setup the other bits manually.
541  uint64_t Rsrc23 = TII->getScratchRsrcWords23();
542 
543  if (MFI->hasImplicitBufferPtr()) {
544  Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
545 
547  const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
548 
549  BuildMI(MBB, I, DL, Mov64, Rsrc01)
551  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
552  } else {
553  const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
554 
556  auto MMO = MF.getMachineMemOperand(
557  PtrInfo,
560  8, Align(4));
561  BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
563  .addImm(0) // offset
564  .addImm(0) // glc
565  .addImm(0) // dlc
566  .addMemOperand(MMO)
567  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
568 
571  }
572  } else {
573  Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
574  Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
575 
576  BuildMI(MBB, I, DL, SMovB32, Rsrc0)
577  .addExternalSymbol("SCRATCH_RSRC_DWORD0")
578  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
579 
580  BuildMI(MBB, I, DL, SMovB32, Rsrc1)
581  .addExternalSymbol("SCRATCH_RSRC_DWORD1")
582  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
583 
584  }
585 
586  BuildMI(MBB, I, DL, SMovB32, Rsrc2)
587  .addImm(Rsrc23 & 0xffffffff)
588  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
589 
590  BuildMI(MBB, I, DL, SMovB32, Rsrc3)
591  .addImm(Rsrc23 >> 32)
592  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
593  } else if (ST.isAmdHsaOrMesa(Fn)) {
594  assert(PreloadedScratchRsrcReg);
595 
596  if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
597  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
598  .addReg(PreloadedScratchRsrcReg, RegState::Kill);
599  }
600  }
601 
602  // Add the scratch wave offset into the scratch RSRC.
603  //
604  // We only want to update the first 48 bits, which is the base address
605  // pointer, without touching the adjacent 16 bits of flags. We know this add
606  // cannot carry-out from bit 47, otherwise the scratch allocation would be
607  // impossible to fit in the 48-bit global address space.
608  //
609  // TODO: Evaluate if it is better to just construct an SRD using the flat
610  // scratch init and some constants rather than update the one we are passed.
611  Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
612  Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
613 
614  // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
615  // the kernel body via inreg arguments.
616  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
617  .addReg(ScratchRsrcSub0)
618  .addReg(ScratchWaveOffsetReg)
619  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
620  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
621  .addReg(ScratchRsrcSub1)
622  .addImm(0)
623  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
624 }
625 
627  switch (ID) {
631  return true;
633  return false;
634  }
635  llvm_unreachable("Invalid TargetStackID::Value");
636 }
637 
638 // Activate all lanes, returns saved exec.
640  MachineFunction &MF,
641  MachineBasicBlock &MBB,
643  bool IsProlog) {
644  Register ScratchExecCopy;
646  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
647  const SIInstrInfo *TII = ST.getInstrInfo();
648  const SIRegisterInfo &TRI = TII->getRegisterInfo();
650  DebugLoc DL;
651 
652  if (LiveRegs.empty()) {
653  if (IsProlog) {
654  LiveRegs.init(TRI);
655  LiveRegs.addLiveIns(MBB);
656  if (FuncInfo->SGPRForFPSaveRestoreCopy)
657  LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
658 
659  if (FuncInfo->SGPRForBPSaveRestoreCopy)
660  LiveRegs.removeReg(FuncInfo->SGPRForBPSaveRestoreCopy);
661  } else {
662  // In epilog.
663  LiveRegs.init(*ST.getRegisterInfo());
664  LiveRegs.addLiveOuts(MBB);
665  LiveRegs.stepBackward(*MBBI);
666  }
667  }
668 
669  ScratchExecCopy = findScratchNonCalleeSaveRegister(
670  MRI, LiveRegs, *TRI.getWaveMaskRegClass());
671 
672  if (!IsProlog)
673  LiveRegs.removeReg(ScratchExecCopy);
674 
675  const unsigned OrSaveExec =
676  ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
677  BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
678 
679  return ScratchExecCopy;
680 }
681 
683  MachineBasicBlock &MBB) const {
685  if (FuncInfo->isEntryFunction()) {
686  emitEntryFunctionPrologue(MF, MBB);
687  return;
688  }
689 
690  const MachineFrameInfo &MFI = MF.getFrameInfo();
692  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
693  const SIInstrInfo *TII = ST.getInstrInfo();
694  const SIRegisterInfo &TRI = TII->getRegisterInfo();
695 
696  Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
697  Register FramePtrReg = FuncInfo->getFrameOffsetReg();
698  Register BasePtrReg =
699  TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
700  LivePhysRegs LiveRegs;
701 
703  DebugLoc DL;
704 
705  bool HasFP = false;
706  bool HasBP = false;
707  uint32_t NumBytes = MFI.getStackSize();
708  uint32_t RoundedSize = NumBytes;
709  // To avoid clobbering VGPRs in lanes that weren't active on function entry,
710  // turn on all lanes before doing the spill to memory.
711  Register ScratchExecCopy;
712 
713  bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue();
714  bool SpillFPToMemory = false;
715  // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
716  // Otherwise we are spilling the FP to memory.
717  if (HasFPSaveIndex) {
718  SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) !=
720  }
721 
722  bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue();
723  bool SpillBPToMemory = false;
724  // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
725  // Otherwise we are spilling the BP to memory.
726  if (HasBPSaveIndex) {
727  SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) !=
729  }
730 
731  // Emit the copy if we need an FP, and are using a free SGPR to save it.
732  if (FuncInfo->SGPRForFPSaveRestoreCopy) {
733  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
734  .addReg(FramePtrReg)
736  }
737 
738  // Emit the copy if we need a BP, and are using a free SGPR to save it.
739  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
740  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
741  FuncInfo->SGPRForBPSaveRestoreCopy)
742  .addReg(BasePtrReg)
744  }
745 
746  // If a copy has been emitted for FP and/or BP, Make the SGPRs
747  // used in the copy instructions live throughout the function.
748  SmallVector<MCPhysReg, 2> TempSGPRs;
749  if (FuncInfo->SGPRForFPSaveRestoreCopy)
750  TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
751 
752  if (FuncInfo->SGPRForBPSaveRestoreCopy)
753  TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
754 
755  if (!TempSGPRs.empty()) {
756  for (MachineBasicBlock &MBB : MF) {
757  for (MCPhysReg Reg : TempSGPRs)
758  MBB.addLiveIn(Reg);
759 
760  MBB.sortUniqueLiveIns();
761  }
762  }
763 
765  : FuncInfo->getSGPRSpillVGPRs()) {
766  if (!Reg.FI.hasValue())
767  continue;
768 
769  if (!ScratchExecCopy)
770  ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
771 
772  buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
773  FuncInfo->getScratchRSrcReg(),
774  StackPtrReg,
775  Reg.FI.getValue());
776  }
777 
778  if (HasFPSaveIndex && SpillFPToMemory) {
780 
781  if (!ScratchExecCopy)
782  ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
783 
785  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
786 
787  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
788  .addReg(FramePtrReg);
789 
790  buildPrologSpill(LiveRegs, MBB, MBBI, TII, TmpVGPR,
791  FuncInfo->getScratchRSrcReg(), StackPtrReg,
792  FuncInfo->FramePointerSaveIndex.getValue());
793  }
794 
795  if (HasBPSaveIndex && SpillBPToMemory) {
797 
798  if (!ScratchExecCopy)
799  ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
800 
802  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
803 
804  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
805  .addReg(BasePtrReg);
806 
807  buildPrologSpill(LiveRegs, MBB, MBBI, TII, TmpVGPR,
808  FuncInfo->getScratchRSrcReg(), StackPtrReg,
809  *FuncInfo->BasePointerSaveIndex);
810  }
811 
812  if (ScratchExecCopy) {
813  // FIXME: Split block and make terminator.
814  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
815  MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
816  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
817  .addReg(ScratchExecCopy, RegState::Kill);
818  LiveRegs.addReg(ScratchExecCopy);
819  }
820 
821  // In this case, spill the FP to a reserved VGPR.
822  if (HasFPSaveIndex && !SpillFPToMemory) {
823  const int FI = FuncInfo->FramePointerSaveIndex.getValue();
824  assert(!MFI.isDeadObjectIndex(FI));
825 
828  FuncInfo->getSGPRToVGPRSpills(FI);
829  assert(Spill.size() == 1);
830 
831  // Save FP before setting it up.
832  // FIXME: This should respect spillSGPRToVGPR;
833  BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
834  Spill[0].VGPR)
835  .addReg(FramePtrReg)
836  .addImm(Spill[0].Lane)
837  .addReg(Spill[0].VGPR, RegState::Undef);
838  }
839 
840  // In this case, spill the BP to a reserved VGPR.
841  if (HasBPSaveIndex && !SpillBPToMemory) {
842  const int BasePtrFI = *FuncInfo->BasePointerSaveIndex;
843  assert(!MFI.isDeadObjectIndex(BasePtrFI));
844 
845  assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
847  FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
848  assert(Spill.size() == 1);
849 
850  // Save BP before setting it up.
851  // FIXME: This should respect spillSGPRToVGPR;
852  BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
853  Spill[0].VGPR)
854  .addReg(BasePtrReg)
855  .addImm(Spill[0].Lane)
856  .addReg(Spill[0].VGPR, RegState::Undef);
857  }
858 
859  if (TRI.needsStackRealignment(MF)) {
860  HasFP = true;
861  const unsigned Alignment = MFI.getMaxAlign().value();
862 
863  RoundedSize += Alignment;
864  if (LiveRegs.empty()) {
865  LiveRegs.init(TRI);
866  LiveRegs.addLiveIns(MBB);
867  LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
868  LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
869  }
870 
872  MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
873  assert(ScratchSPReg && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy &&
874  ScratchSPReg != FuncInfo->SGPRForBPSaveRestoreCopy);
875 
876  // s_add_u32 tmp_reg, s32, NumBytes
877  // s_and_b32 s32, tmp_reg, 0b111...0000
878  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
879  .addReg(StackPtrReg)
880  .addImm((Alignment - 1) * ST.getWavefrontSize())
881  .setMIFlag(MachineInstr::FrameSetup);
882  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
883  .addReg(ScratchSPReg, RegState::Kill)
884  .addImm(-Alignment * ST.getWavefrontSize())
885  .setMIFlag(MachineInstr::FrameSetup);
886  FuncInfo->setIsStackRealigned(true);
887  } else if ((HasFP = hasFP(MF))) {
888  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
889  .addReg(StackPtrReg)
891  }
892 
893  // If we need a base pointer, set it up here. It's whatever the value of
894  // the stack pointer is at this point. Any variable size objects will be
895  // allocated after this, so we can still use the base pointer to reference
896  // the incoming arguments.
897  if ((HasBP = TRI.hasBasePointer(MF))) {
898  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
899  .addReg(StackPtrReg)
901  }
902 
903  if (HasFP && RoundedSize != 0) {
904  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
905  .addReg(StackPtrReg)
906  .addImm(RoundedSize * ST.getWavefrontSize())
907  .setMIFlag(MachineInstr::FrameSetup);
908  }
909 
910  assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
911  FuncInfo->FramePointerSaveIndex)) &&
912  "Needed to save FP but didn't save it anywhere");
913 
914  assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
915  !FuncInfo->FramePointerSaveIndex)) &&
916  "Saved FP but didn't need it");
917 
918  assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
919  FuncInfo->BasePointerSaveIndex)) &&
920  "Needed to save BP but didn't save it anywhere");
921 
922  assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
923  !FuncInfo->BasePointerSaveIndex)) &&
924  "Saved BP but didn't need it");
925 }
926 
928  MachineBasicBlock &MBB) const {
929  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
930  if (FuncInfo->isEntryFunction())
931  return;
932 
933  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
934  const SIInstrInfo *TII = ST.getInstrInfo();
936  const SIRegisterInfo &TRI = TII->getRegisterInfo();
938  LivePhysRegs LiveRegs;
939  DebugLoc DL;
940 
941  const MachineFrameInfo &MFI = MF.getFrameInfo();
942  uint32_t NumBytes = MFI.getStackSize();
943  uint32_t RoundedSize = FuncInfo->isStackRealigned()
944  ? NumBytes + MFI.getMaxAlign().value()
945  : NumBytes;
946  const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
947  const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
948  const Register BasePtrReg =
949  TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
950 
951  bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue();
952  bool SpillFPToMemory = false;
953  if (HasFPSaveIndex) {
954  SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) !=
956  }
957 
958  bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue();
959  bool SpillBPToMemory = false;
960  if (HasBPSaveIndex) {
961  SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) !=
963  }
964 
965  if (RoundedSize != 0 && hasFP(MF)) {
966  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
967  .addReg(StackPtrReg)
968  .addImm(RoundedSize * ST.getWavefrontSize())
969  .setMIFlag(MachineInstr::FrameDestroy);
970  }
971 
972  if (FuncInfo->SGPRForFPSaveRestoreCopy) {
973  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
974  .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
976  }
977 
978  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
979  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
980  .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
982  }
983 
984  Register ScratchExecCopy;
985  if (HasFPSaveIndex) {
986  const int FI = FuncInfo->FramePointerSaveIndex.getValue();
987  assert(!MFI.isDeadObjectIndex(FI));
988  if (SpillFPToMemory) {
989  if (!ScratchExecCopy)
990  ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
991 
993  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
994  buildEpilogReload(LiveRegs, MBB, MBBI, TII, TempVGPR,
995  FuncInfo->getScratchRSrcReg(), StackPtrReg, FI);
996  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
997  .addReg(TempVGPR, RegState::Kill);
998  } else {
999  // Reload from VGPR spill.
1002  FuncInfo->getSGPRToVGPRSpills(FI);
1003  assert(Spill.size() == 1);
1004  BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
1005  FramePtrReg)
1006  .addReg(Spill[0].VGPR)
1007  .addImm(Spill[0].Lane);
1008  }
1009  }
1010 
1011  if (HasBPSaveIndex) {
1012  const int BasePtrFI = *FuncInfo->BasePointerSaveIndex;
1013  assert(!MFI.isDeadObjectIndex(BasePtrFI));
1014  if (SpillBPToMemory) {
1015  if (!ScratchExecCopy)
1016  ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
1017 
1019  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1020  buildEpilogReload(LiveRegs, MBB, MBBI, TII, TempVGPR,
1021  FuncInfo->getScratchRSrcReg(), StackPtrReg, BasePtrFI);
1022  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
1023  .addReg(TempVGPR, RegState::Kill);
1024  } else {
1025  // Reload from VGPR spill.
1026  assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
1028  FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
1029  assert(Spill.size() == 1);
1030  BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
1031  BasePtrReg)
1032  .addReg(Spill[0].VGPR)
1033  .addImm(Spill[0].Lane);
1034  }
1035  }
1036 
1038  FuncInfo->getSGPRSpillVGPRs()) {
1039  if (!Reg.FI.hasValue())
1040  continue;
1041 
1042  if (!ScratchExecCopy)
1043  ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
1044 
1045  buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
1046  FuncInfo->getScratchRSrcReg(), StackPtrReg,
1047  Reg.FI.getValue());
1048  }
1049 
1050  if (ScratchExecCopy) {
1051  // FIXME: Split block and make terminator.
1052  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1053  MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1054  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
1055  .addReg(ScratchExecCopy, RegState::Kill);
1056  }
1057 }
1058 
1059 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
1060 // memory. They should have been removed by now.
1061 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
1062  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1063  I != E; ++I) {
1064  if (!MFI.isDeadObjectIndex(I))
1065  return false;
1066  }
1067 
1068  return true;
1069 }
1070 
1071 #ifndef NDEBUG
1073  Optional<int> FramePointerSaveIndex,
1074  Optional<int> BasePointerSaveIndex) {
1075  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1076  I != E; ++I) {
1077  if (!MFI.isDeadObjectIndex(I) &&
1079  ((FramePointerSaveIndex && I != FramePointerSaveIndex) ||
1080  (BasePointerSaveIndex && I != BasePointerSaveIndex))) {
1081  return false;
1082  }
1083  }
1084 
1085  return true;
1086 }
1087 #endif
1088 
1090  Register &FrameReg) const {
1091  const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1092 
1093  FrameReg = RI->getFrameRegister(MF);
1094  return MF.getFrameInfo().getObjectOffset(FI);
1095 }
1096 
1098  MachineFunction &MF,
1099  RegScavenger *RS) const {
1100  MachineFrameInfo &MFI = MF.getFrameInfo();
1101 
1102  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1103  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1105 
1106  FuncInfo->removeDeadFrameIndices(MFI);
1108  "SGPR spill should have been removed in SILowerSGPRSpills");
1109 
1110  // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1111  // but currently hasNonSpillStackObjects is set only from source
1112  // allocas. Stack temps produced from legalization are not counted currently.
1113  if (!allStackObjectsAreDead(MFI)) {
1114  assert(RS && "RegScavenger required if spilling");
1115 
1116  if (FuncInfo->isEntryFunction()) {
1117  int ScavengeFI = MFI.CreateFixedObject(
1118  TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
1119  RS->addScavengingFrameIndex(ScavengeFI);
1120  } else {
1121  int ScavengeFI = MFI.CreateStackObject(
1122  TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
1123  TRI->getSpillAlign(AMDGPU::SGPR_32RegClass), false);
1124  RS->addScavengingFrameIndex(ScavengeFI);
1125  }
1126  }
1127 }
1128 
1129 // Only report VGPRs to generic code.
1131  BitVector &SavedVGPRs,
1132  RegScavenger *RS) const {
1133  TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1135  if (MFI->isEntryFunction())
1136  return;
1137 
1138  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1139  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1140  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1141 
1142  // Ignore the SGPRs the default implementation found.
1143  SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask());
1144 
1145  // hasFP only knows about stack objects that already exist. We're now
1146  // determining the stack slots that will be created, so we have to predict
1147  // them. Stack objects force FP usage with calls.
1148  //
1149  // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1150  // don't want to report it here.
1151  //
1152  // FIXME: Is this really hasReservedCallFrame?
1153  const bool WillHaveFP =
1154  FrameInfo.hasCalls() &&
1155  (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1156 
1157  // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1158  // so don't allow the default insertion to handle them.
1159  for (auto SSpill : MFI->getSGPRSpillVGPRs())
1160  SavedVGPRs.reset(SSpill.VGPR);
1161 
1162  LivePhysRegs LiveRegs;
1163  LiveRegs.init(*TRI);
1164 
1165  if (WillHaveFP || hasFP(MF)) {
1167  MFI->FramePointerSaveIndex, true);
1168  }
1169 
1170  if (TRI->hasBasePointer(MF)) {
1171  if (MFI->SGPRForFPSaveRestoreCopy)
1172  LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
1174  MFI->BasePointerSaveIndex, false);
1175  }
1176 }
1177 
1179  BitVector &SavedRegs,
1180  RegScavenger *RS) const {
1181  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1183  if (MFI->isEntryFunction())
1184  return;
1185 
1186  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1187  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1188 
1189  // The SP is specifically managed and we don't want extra spills of it.
1190  SavedRegs.reset(MFI->getStackPtrOffsetReg());
1191  SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask());
1192 }
1193 
1196  std::vector<CalleeSavedInfo> &CSI) const {
1197  if (CSI.empty())
1198  return true; // Early exit if no callee saved registers are modified!
1199 
1200  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1201  if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
1202  !FuncInfo->SGPRForBPSaveRestoreCopy)
1203  return false;
1204 
1205  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1206  const SIRegisterInfo *RI = ST.getRegisterInfo();
1207  Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1208  Register BasePtrReg = RI->getBaseRegister();
1209  unsigned NumModifiedRegs = 0;
1210 
1211  if (FuncInfo->SGPRForFPSaveRestoreCopy)
1212  NumModifiedRegs++;
1213  if (FuncInfo->SGPRForBPSaveRestoreCopy)
1214  NumModifiedRegs++;
1215 
1216  for (auto &CS : CSI) {
1217  if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
1218  CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1219  if (--NumModifiedRegs)
1220  break;
1221  } else if (CS.getReg() == BasePtrReg &&
1222  FuncInfo->SGPRForBPSaveRestoreCopy) {
1223  CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
1224  if (--NumModifiedRegs)
1225  break;
1226  }
1227  }
1228 
1229  return false;
1230 }
1231 
1233  MachineFunction &MF,
1234  MachineBasicBlock &MBB,
1235  MachineBasicBlock::iterator I) const {
1236  int64_t Amount = I->getOperand(0).getImm();
1237  if (Amount == 0)
1238  return MBB.erase(I);
1239 
1240  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1241  const SIInstrInfo *TII = ST.getInstrInfo();
1242  const DebugLoc &DL = I->getDebugLoc();
1243  unsigned Opc = I->getOpcode();
1244  bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1245  uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1246 
1247  if (!hasReservedCallFrame(MF)) {
1248  Amount = alignTo(Amount, getStackAlign());
1249  assert(isUInt<32>(Amount) && "exceeded stack address space size");
1251  Register SPReg = MFI->getStackPtrOffsetReg();
1252 
1253  unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
1254  BuildMI(MBB, I, DL, TII->get(Op), SPReg)
1255  .addReg(SPReg)
1256  .addImm(Amount * ST.getWavefrontSize());
1257  } else if (CalleePopAmount != 0) {
1258  llvm_unreachable("is this used?");
1259  }
1260 
1261  return MBB.erase(I);
1262 }
1263 
1265  const MachineFrameInfo &MFI = MF.getFrameInfo();
1266 
1267  // For entry functions we can use an immediate offset in most cases, so the
1268  // presence of calls doesn't imply we need a distinct frame pointer.
1269  if (MFI.hasCalls() &&
1270  !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1271  // All offsets are unsigned, so need to be addressed in the same direction
1272  // as stack growth.
1273 
1274  // FIXME: This function is pretty broken, since it can be called before the
1275  // frame layout is determined or CSR spills are inserted.
1276  return MFI.getStackSize() != 0;
1277  }
1278 
1279  return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
1280  MFI.hasStackMap() || MFI.hasPatchPoint() ||
1281  MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
1283 }
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:208
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:412
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition: BitVector.h:786
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Interface definition for SIRegisterInfo.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
AMDGPU specific subclass of TargetSubtarget.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:22
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Register getGITPtrLoReg(const MachineFunction &MF) const
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:187
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
unsigned Reg
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
const SIInstrInfo * getInstrInfo() const override
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
bool isSupportedStackID(TargetStackID::Value ID) const override
F(f)
void setIsStackRealigned(bool Realigned=true)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, const TargetRegisterClass &RC, bool Unused=false)
void removeDeadFrameIndices(MachineFrameInfo &MFI)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
Function & getFunction()
Return the LLVM function that this machine code represents.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Address space for constant memory (VTX2).
Definition: AMDGPU.h:301
MachineBasicBlock & MBB
bool hasBasePointer(const MachineFunction &MF) const
bool empty() const
Returns true if the set is empty.
Definition: LivePhysRegs.h:76
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
A description of a memory reference used in the backend.
bool isMesaGfxShader(const Function &F) const
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
const HexagonInstrInfo * TII
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The memory access is dereferenceable (i.e., doesn&#39;t trap).
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const
returns true if NumLanes slots are available in VGPRs already used for SGPR spilling.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
int getObjectIndexBegin() const
Return the minimum frame object index.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1172
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
bool isCompute(CallingConv::ID cc)
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every &#39;0&#39; bit in Mask.
Definition: BitVector.h:798
This file declares the machine register scavenger class.
unsigned const MachineRegisterInfo * MRI
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:156
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Register SGPRForFPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the frame pointer.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
void addLiveIns(const MachineBasicBlock &MBB)
Adds all live-in registers of basic block MBB.
bool any() const
any - Returns true if any bit is set.
Definition: BitVector.h:181
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:66
void stepBackward(const MachineInstr &MI)
Simulates liveness when stepping backwards over an instruction(bundle).
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:305
BitVector & reset()
Definition: BitVector.h:439
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Generation getGeneration() const
bool isPhysRegUsed(MCRegister PhysReg) const
Return true if the specified register is modified or read in this function.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
ArrayRef< SGPRSpillVGPRCSR > getSGPRSpillVGPRs() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MachineBasicBlock & front() const
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function.
The memory access writes data.
unsigned getWavefrontSize() const
static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const SIInstrInfo *TII, Register SpillReg, Register ScratchRsrcReg, Register SPReg, int FI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:219
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:883
Register SGPRForBPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the base pointer.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Promote Memory to Register
Definition: Mem2Reg.cpp:110
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
bool hasSGPRInitBug() const
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array...
Definition: ArrayRef.h:186
int getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
bool flatScratchIsPointer() const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
bool hasValue() const
Definition: Optional.h:259
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:158
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:48
Register getBaseRegister() const
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
TargetOptions Options
#define I(x, y, z)
Definition: MD5.cpp:59
The memory access always returns the same value (or traps).
bool isAmdHsaOrMesa(const Function &F) const
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LivePhysRegs &LiveRegs, Register &TempSGPR, Optional< int > &FrameIndex, bool IsFP)
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getImplicitBufferPtrUserSGPR() const
const TargetRegisterClass * getWaveMaskRegClass() const
uint8_t getStackID(int ObjectIdx) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Register getFrameRegister(const MachineFunction &MF) const override
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it...
static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const SIInstrInfo *TII, Register SpillReg, Register ScratchRsrcReg, Register SPReg, int FI)
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:79
void removeReg(MCPhysReg Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set...
Definition: LivePhysRegs.h:89
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI, Optional< int > FramePointerSaveIndex, Optional< int > BasePointerSaveIndex)
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
MachineBasicBlock MachineBasicBlock::iterator MBBI
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool hasCalls() const
Return true if the current function has any function calls.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
const SIRegisterInfo * getRegisterInfo() const override