LLVM  10.0.0svn
SIFrameLowering.cpp
Go to the documentation of this file.
1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPUSubtarget.h"
11 #include "SIInstrInfo.h"
12 #include "SIMachineFunctionInfo.h"
13 #include "SIRegisterInfo.h"
15 
21 
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "frame-info"
25 
26 
28  const MachineFunction &MF) {
29  return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
30  ST.getMaxNumSGPRs(MF) / 4);
31 }
32 
34  const MachineFunction &MF) {
35  return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
36  ST.getMaxNumSGPRs(MF));
37 }
38 
39 // Find a scratch register that we can use at the start of the prologue to
40 // re-align the stack pointer. We avoid using callee-save registers since they
41 // may appear to be free when this is called from canUseAsPrologue (during
42 // shrink wrapping), but then no longer be free when this is called from
43 // emitPrologue.
44 //
45 // FIXME: This is a bit conservative, since in the above case we could use one
46 // of the callee-save registers as a scratch temp to re-align the stack pointer,
47 // but we would then have to make sure that we were in fact saving at least one
48 // callee-save register in the prologue, which is additional complexity that
49 // doesn't seem worth the benefit.
51  LivePhysRegs &LiveRegs,
52  const TargetRegisterClass &RC,
53  bool Unused = false) {
54  // Mark callee saved registers as used so we will not choose them.
55  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
56  for (unsigned i = 0; CSRegs[i]; ++i)
57  LiveRegs.addReg(CSRegs[i]);
58 
59  if (Unused) {
60  // We are looking for a register that can be used throughout the entire
61  // function, so any use is unacceptable.
62  for (unsigned Reg : RC) {
63  if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
64  return Reg;
65  }
66  } else {
67  for (unsigned Reg : RC) {
68  if (LiveRegs.available(MRI, Reg))
69  return Reg;
70  }
71  }
72 
73  // If we require an unused register, this is used in contexts where failure is
74  // an option and has an alternative plan. In other contexts, this must
75  // succeed0.
76  if (!Unused)
77  report_fatal_error("failed to find free scratch register");
78 
79  return AMDGPU::NoRegister;
80 }
81 
83  LivePhysRegs LiveRegs;
84  LiveRegs.init(*MRI.getTargetRegisterInfo());
86  MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
87 }
88 
89 // We need to specially emit stack operations here because a different frame
90 // register is used than in the rest of the function, as getFrameRegister would
91 // use.
92 static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
94  const SIInstrInfo *TII, unsigned SpillReg,
95  unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
96  MachineFunction *MF = MBB.getParent();
97  MachineFrameInfo &MFI = MF->getFrameInfo();
98 
99  int64_t Offset = MFI.getObjectOffset(FI);
100 
103  MFI.getObjectAlignment(FI));
104 
105  if (isUInt<12>(Offset)) {
106  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
107  .addReg(SpillReg, RegState::Kill)
108  .addReg(ScratchRsrcReg)
109  .addReg(SPReg)
110  .addImm(Offset)
111  .addImm(0) // glc
112  .addImm(0) // slc
113  .addImm(0) // tfe
114  .addImm(0) // dlc
115  .addImm(0) // swz
116  .addMemOperand(MMO);
117  return;
118  }
119 
121  MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
122 
123  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
124  .addImm(Offset);
125 
126  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
127  .addReg(SpillReg, RegState::Kill)
128  .addReg(OffsetReg, RegState::Kill)
129  .addReg(ScratchRsrcReg)
130  .addReg(SPReg)
131  .addImm(0)
132  .addImm(0) // glc
133  .addImm(0) // slc
134  .addImm(0) // tfe
135  .addImm(0) // dlc
136  .addImm(0) // swz
137  .addMemOperand(MMO);
138 }
139 
142  const SIInstrInfo *TII, unsigned SpillReg,
143  unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
144  MachineFunction *MF = MBB.getParent();
145  MachineFrameInfo &MFI = MF->getFrameInfo();
146  int64_t Offset = MFI.getObjectOffset(FI);
147 
150  MFI.getObjectAlignment(FI));
151 
152  if (isUInt<12>(Offset)) {
153  BuildMI(MBB, I, DebugLoc(),
154  TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg)
155  .addReg(ScratchRsrcReg)
156  .addReg(SPReg)
157  .addImm(Offset)
158  .addImm(0) // glc
159  .addImm(0) // slc
160  .addImm(0) // tfe
161  .addImm(0) // dlc
162  .addImm(0) // swz
163  .addMemOperand(MMO);
164  return;
165  }
166 
168  MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
169 
170  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
171  .addImm(Offset);
172 
173  BuildMI(MBB, I, DebugLoc(),
174  TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg)
175  .addReg(OffsetReg, RegState::Kill)
176  .addReg(ScratchRsrcReg)
177  .addReg(SPReg)
178  .addImm(0)
179  .addImm(0) // glc
180  .addImm(0) // slc
181  .addImm(0) // tfe
182  .addImm(0) // dlc
183  .addImm(0) // swz
184  .addMemOperand(MMO);
185 }
186 
187 void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
188  MachineFunction &MF,
189  MachineBasicBlock &MBB) const {
190  const SIInstrInfo *TII = ST.getInstrInfo();
191  const SIRegisterInfo* TRI = &TII->getRegisterInfo();
193 
194  // We don't need this if we only have spills since there is no user facing
195  // scratch.
196 
197  // TODO: If we know we don't have flat instructions earlier, we can omit
198  // this from the input registers.
199  //
200  // TODO: We only need to know if we access scratch space through a flat
201  // pointer. Because we only detect if flat instructions are used at all,
202  // this will be used more often than necessary on VI.
203 
204  // Debug location must be unknown since the first debug location is used to
205  // determine the end of the prologue.
206  DebugLoc DL;
208 
209  Register FlatScratchInitReg =
211 
213  MRI.addLiveIn(FlatScratchInitReg);
214  MBB.addLiveIn(FlatScratchInitReg);
215 
216  Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
217  Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
218 
219  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
220 
221  // Do a 64-bit pointer add.
222  if (ST.flatScratchIsPointer()) {
224  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
225  .addReg(FlatScrInitLo)
226  .addReg(ScratchWaveOffsetReg);
227  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
228  .addReg(FlatScrInitHi)
229  .addImm(0);
230  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
231  addReg(FlatScrInitLo).
232  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
234  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
235  addReg(FlatScrInitHi).
236  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
238  return;
239  }
240 
241  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
242  .addReg(FlatScrInitLo)
243  .addReg(ScratchWaveOffsetReg);
244  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
245  .addReg(FlatScrInitHi)
246  .addImm(0);
247 
248  return;
249  }
250 
252 
253  // Copy the size in bytes.
254  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
255  .addReg(FlatScrInitHi, RegState::Kill);
256 
257  // Add wave offset in bytes to private base offset.
258  // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
259  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
260  .addReg(FlatScrInitLo)
261  .addReg(ScratchWaveOffsetReg);
262 
263  // Convert offset to 256-byte units.
264  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
265  .addReg(FlatScrInitLo, RegState::Kill)
266  .addImm(8);
267 }
268 
269 unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
270  const GCNSubtarget &ST,
271  const SIInstrInfo *TII,
272  const SIRegisterInfo *TRI,
274  MachineFunction &MF) const {
276 
277  // We need to insert initialization of the scratch resource descriptor.
278  unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
279  if (ScratchRsrcReg == AMDGPU::NoRegister ||
280  !MRI.isPhysRegUsed(ScratchRsrcReg))
281  return AMDGPU::NoRegister;
282 
283  if (ST.hasSGPRInitBug() ||
284  ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
285  return ScratchRsrcReg;
286 
287  // We reserved the last registers for this. Shift it down to the end of those
288  // which were actually used.
289  //
290  // FIXME: It might be safer to use a pseudoregister before replacement.
291 
292  // FIXME: We should be able to eliminate unused input registers. We only
293  // cannot do this for the resources required for scratch access. For now we
294  // skip over user SGPRs and may leave unused holes.
295 
296  // We find the resource first because it has an alignment requirement.
297 
298  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
299  ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
300  AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
301 
302  // Skip the last N reserved elements because they should have already been
303  // reserved for VCC etc.
304  for (MCPhysReg Reg : AllSGPR128s) {
305  // Pick the first unallocated one. Make sure we don't clobber the other
306  // reserved input we needed.
307  if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
308  MRI.replaceRegWith(ScratchRsrcReg, Reg);
309  MFI->setScratchRSrcReg(Reg);
310  return Reg;
311  }
312  }
313 
314  return ScratchRsrcReg;
315 }
316 
317 // Shift down registers reserved for the scratch wave offset.
318 std::pair<unsigned, bool>
319 SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
320  const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
321  SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
323  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
324 
325  assert(MFI->isEntryFunction());
326 
327  // No replacement necessary.
328  if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
329  (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
330  return std::make_pair(AMDGPU::NoRegister, false);
331  }
332 
333  if (ST.hasSGPRInitBug())
334  return std::make_pair(ScratchWaveOffsetReg, false);
335 
336  unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
337 
338  ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
339  if (NumPreloaded > AllSGPRs.size())
340  return std::make_pair(ScratchWaveOffsetReg, false);
341 
342  AllSGPRs = AllSGPRs.slice(NumPreloaded);
343 
344  // We need to drop register from the end of the list that we cannot use
345  // for the scratch wave offset.
346  // + 2 s102 and s103 do not exist on VI.
347  // + 2 for vcc
348  // + 2 for xnack_mask
349  // + 2 for flat_scratch
350  // + 4 for registers reserved for scratch resource register
351  // + 1 for register reserved for scratch wave offset. (By exluding this
352  // register from the list to consider, it means that when this
353  // register is being used for the scratch wave offset and there
354  // are no other free SGPRs, then the value will stay in this register.
355  // + 1 if stack pointer is used.
356  // ----
357  // 13 (+1)
358  unsigned ReservedRegCount = 13;
359 
360  if (AllSGPRs.size() < ReservedRegCount)
361  return std::make_pair(ScratchWaveOffsetReg, false);
362 
363  bool HandledScratchWaveOffsetReg =
364  ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
365  bool FPAdjusted = false;
366 
367  for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
368  // Pick the first unallocated SGPR. Be careful not to pick an alias of the
369  // scratch descriptor, since we haven’t added its uses yet.
370  if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
371  if (!HandledScratchWaveOffsetReg) {
372  HandledScratchWaveOffsetReg = true;
373 
374  MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
375  if (MFI->getScratchWaveOffsetReg() == MFI->getStackPtrOffsetReg()) {
376  assert(!hasFP(MF));
378  }
379 
381  MFI->setFrameOffsetReg(Reg);
382  ScratchWaveOffsetReg = Reg;
383  FPAdjusted = true;
384  break;
385  }
386  }
387  }
388 
389  return std::make_pair(ScratchWaveOffsetReg, FPAdjusted);
390 }
391 
393  MachineBasicBlock &MBB) const {
394  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
395 
397 
398  // If we only have SGPR spills, we won't actually be using scratch memory
399  // since these spill to VGPRs.
400  //
401  // FIXME: We should be cleaning up these unused SGPR spill frame indices
402  // somewhere.
403 
404  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
405  const SIInstrInfo *TII = ST.getInstrInfo();
406  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
408  const Function &F = MF.getFunction();
409 
410  // We need to do the replacement of the private segment buffer and wave offset
411  // register even if there are no stack objects. There could be stores to undef
412  // or a constant without an associated object.
413 
414  // FIXME: We still have implicit uses on SGPR spill instructions in case they
415  // need to spill to vector memory. It's likely that will not happen, but at
416  // this point it appears we need the setup. This part of the prolog should be
417  // emitted after frame indices are eliminated.
418 
419  if (MFI->hasFlatScratchInit())
420  emitFlatScratchInit(ST, MF, MBB);
421 
422  unsigned ScratchRsrcReg
423  = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
424 
425  unsigned ScratchWaveOffsetReg;
426  bool FPAdjusted;
427  std::tie(ScratchWaveOffsetReg, FPAdjusted) =
428  getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
429 
430  // We need to insert initialization of the scratch resource descriptor.
431  Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
433 
434  unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
435  if (ST.isAmdHsaOrMesa(F)) {
436  PreloadedPrivateBufferReg = MFI->getPreloadedReg(
438  }
439 
440  bool OffsetRegUsed = ScratchWaveOffsetReg != AMDGPU::NoRegister &&
441  MRI.isPhysRegUsed(ScratchWaveOffsetReg);
442  bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
443  MRI.isPhysRegUsed(ScratchRsrcReg);
444 
445  // FIXME: Hack to not crash in situations which emitted an error.
446  if (PreloadedScratchWaveOffsetReg == AMDGPU::NoRegister)
447  return;
448 
449  // We added live-ins during argument lowering, but since they were not used
450  // they were deleted. We're adding the uses now, so add them back.
451  MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
452  MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
453 
454  if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
455  assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F));
456  MRI.addLiveIn(PreloadedPrivateBufferReg);
457  MBB.addLiveIn(PreloadedPrivateBufferReg);
458  }
459 
460  // Make the register selected live throughout the function.
461  for (MachineBasicBlock &OtherBB : MF) {
462  if (&OtherBB == &MBB)
463  continue;
464 
465  if (OffsetRegUsed || FPAdjusted)
466  OtherBB.addLiveIn(ScratchWaveOffsetReg);
467 
468  if (ResourceRegUsed)
469  OtherBB.addLiveIn(ScratchRsrcReg);
470  }
471 
472  DebugLoc DL;
474 
475  // If we reserved the original input registers, we don't need to copy to the
476  // reserved registers.
477 
478  bool CopyBuffer = ResourceRegUsed &&
479  PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
480  ST.isAmdHsaOrMesa(F) &&
481  ScratchRsrcReg != PreloadedPrivateBufferReg;
482 
483  // This needs to be careful of the copying order to avoid overwriting one of
484  // the input registers before it's been copied to it's final
485  // destination. Usually the offset should be copied first.
486  bool CopyBufferFirst = TRI->isSubRegisterEq(PreloadedPrivateBufferReg,
487  ScratchWaveOffsetReg);
488  if (CopyBuffer && CopyBufferFirst) {
489  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
490  .addReg(PreloadedPrivateBufferReg, RegState::Kill);
491  }
492 
493  unsigned SPReg = MFI->getStackPtrOffsetReg();
494  assert(SPReg != AMDGPU::SP_REG);
495 
496  // FIXME: Remove the isPhysRegUsed checks
497  const bool HasFP = hasFP(MF);
498 
499  if (HasFP || OffsetRegUsed) {
500  assert(ScratchWaveOffsetReg);
501  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
502  .addReg(PreloadedScratchWaveOffsetReg, HasFP ? RegState::Kill : 0);
503  }
504 
505  if (CopyBuffer && !CopyBufferFirst) {
506  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
507  .addReg(PreloadedPrivateBufferReg, RegState::Kill);
508  }
509 
510  if (ResourceRegUsed) {
511  emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,
512  PreloadedPrivateBufferReg, ScratchRsrcReg);
513  }
514 
515  if (HasFP) {
516  DebugLoc DL;
517  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
518  int64_t StackSize = FrameInfo.getStackSize();
519 
520  // On kernel entry, the private scratch wave offset is the SP value.
521  if (StackSize == 0) {
522  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SPReg)
523  .addReg(MFI->getScratchWaveOffsetReg());
524  } else {
525  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
526  .addReg(MFI->getScratchWaveOffsetReg())
527  .addImm(StackSize * ST.getWavefrontSize());
528  }
529  }
530 }
531 
532 // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
533 void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
535  MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg,
536  unsigned ScratchRsrcReg) const {
537 
538  const SIInstrInfo *TII = ST.getInstrInfo();
539  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
540  const Function &Fn = MF.getFunction();
541  DebugLoc DL;
542 
543  if (ST.isAmdPalOS()) {
544  // The pointer to the GIT is formed from the offset passed in and either
545  // the amdgpu-git-ptr-high function attribute or the top part of the PC
546  Register RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
547  Register RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
548  Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
549 
550  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
551 
552  if (MFI->getGITPtrHigh() != 0xffffffff) {
553  BuildMI(MBB, I, DL, SMovB32, RsrcHi)
554  .addImm(MFI->getGITPtrHigh())
555  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
556  } else {
557  const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
558  BuildMI(MBB, I, DL, GetPC64, Rsrc01);
559  }
560  auto GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
561  if (ST.hasMergedShaders()) {
562  switch (MF.getFunction().getCallingConv()) {
565  // Low GIT address is passed in s8 rather than s0 for an LS+HS or
566  // ES+GS merged shader on gfx9+.
567  GitPtrLo = AMDGPU::SGPR8;
568  break;
569  default:
570  break;
571  }
572  }
573  MF.getRegInfo().addLiveIn(GitPtrLo);
574  MBB.addLiveIn(GitPtrLo);
575  BuildMI(MBB, I, DL, SMovB32, RsrcLo)
576  .addReg(GitPtrLo)
577  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
578 
579  // We now have the GIT ptr - now get the scratch descriptor from the entry
580  // at offset 0 (or offset 16 for a compute shader).
581  PointerType *PtrTy =
584  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
585  const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
586  auto MMO = MF.getMachineMemOperand(PtrInfo,
590  16, 4);
591  unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
592  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
593  unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset);
594  BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
595  .addReg(Rsrc01)
596  .addImm(EncodedOffset) // offset
597  .addImm(0) // glc
598  .addImm(0) // dlc
599  .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
600  .addMemOperand(MMO);
601  return;
602  }
603  if (ST.isMesaGfxShader(Fn)
604  || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) {
605  assert(!ST.isAmdHsaOrMesa(Fn));
606  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
607 
608  Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
609  Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
610 
611  // Use relocations to get the pointer, and setup the other bits manually.
612  uint64_t Rsrc23 = TII->getScratchRsrcWords23();
613 
614  if (MFI->hasImplicitBufferPtr()) {
615  Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
616 
618  const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
619 
620  BuildMI(MBB, I, DL, Mov64, Rsrc01)
622  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
623  } else {
624  const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
625 
626  PointerType *PtrTy =
629  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
630  auto MMO = MF.getMachineMemOperand(PtrInfo,
634  8, 4);
635  BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
637  .addImm(0) // offset
638  .addImm(0) // glc
639  .addImm(0) // dlc
640  .addMemOperand(MMO)
641  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
642 
645  }
646  } else {
647  Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
648  Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
649 
650  BuildMI(MBB, I, DL, SMovB32, Rsrc0)
651  .addExternalSymbol("SCRATCH_RSRC_DWORD0")
652  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
653 
654  BuildMI(MBB, I, DL, SMovB32, Rsrc1)
655  .addExternalSymbol("SCRATCH_RSRC_DWORD1")
656  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
657 
658  }
659 
660  BuildMI(MBB, I, DL, SMovB32, Rsrc2)
661  .addImm(Rsrc23 & 0xffffffff)
662  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
663 
664  BuildMI(MBB, I, DL, SMovB32, Rsrc3)
665  .addImm(Rsrc23 >> 32)
666  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
667  }
668 }
669 
671  switch (ID) {
675  return true;
677  return false;
678  }
679  llvm_unreachable("Invalid TargetStackID::Value");
680 }
681 
683  MachineBasicBlock &MBB) const {
685  if (FuncInfo->isEntryFunction()) {
686  emitEntryFunctionPrologue(MF, MBB);
687  return;
688  }
689 
690  const MachineFrameInfo &MFI = MF.getFrameInfo();
692  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
693  const SIInstrInfo *TII = ST.getInstrInfo();
694  const SIRegisterInfo &TRI = TII->getRegisterInfo();
695 
696  unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
697  unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
698  LivePhysRegs LiveRegs;
699 
700  MachineBasicBlock::iterator MBBI = MBB.begin();
701  DebugLoc DL;
702 
703  bool HasFP = false;
704  uint32_t NumBytes = MFI.getStackSize();
705  uint32_t RoundedSize = NumBytes;
706  // To avoid clobbering VGPRs in lanes that weren't active on function entry,
707  // turn on all lanes before doing the spill to memory.
708  unsigned ScratchExecCopy = AMDGPU::NoRegister;
709 
710  // Emit the copy if we need an FP, and are using a free SGPR to save it.
711  if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
712  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
713  .addReg(FramePtrReg)
715  }
716 
718  : FuncInfo->getSGPRSpillVGPRs()) {
719  if (!Reg.FI.hasValue())
720  continue;
721 
722  if (ScratchExecCopy == AMDGPU::NoRegister) {
723  if (LiveRegs.empty()) {
724  LiveRegs.init(TRI);
725  LiveRegs.addLiveIns(MBB);
726  if (FuncInfo->SGPRForFPSaveRestoreCopy)
727  LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
728  }
729 
730  ScratchExecCopy
731  = findScratchNonCalleeSaveRegister(MRI, LiveRegs,
732  *TRI.getWaveMaskRegClass());
733  assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy);
734 
735  const unsigned OrSaveExec = ST.isWave32() ?
736  AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
737  BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec),
738  ScratchExecCopy)
739  .addImm(-1);
740  }
741 
742  buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
743  FuncInfo->getScratchRSrcReg(),
744  StackPtrReg,
745  Reg.FI.getValue());
746  }
747 
748  if (ScratchExecCopy != AMDGPU::NoRegister) {
749  // FIXME: Split block and make terminator.
750  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
751  unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
752  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
753  .addReg(ScratchExecCopy, RegState::Kill);
754  LiveRegs.addReg(ScratchExecCopy);
755  }
756 
757 
758  if (FuncInfo->FramePointerSaveIndex) {
759  const int FI = FuncInfo->FramePointerSaveIndex.getValue();
760  assert(!MFI.isDeadObjectIndex(FI) &&
763  = FuncInfo->getSGPRToVGPRSpills(FI);
764  assert(Spill.size() == 1);
765 
766  // Save FP before setting it up.
767  // FIXME: This should respect spillSGPRToVGPR;
768  BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
769  Spill[0].VGPR)
770  .addReg(FramePtrReg)
771  .addImm(Spill[0].Lane)
772  .addReg(Spill[0].VGPR, RegState::Undef);
773  }
774 
775  if (TRI.needsStackRealignment(MF)) {
776  HasFP = true;
777  const unsigned Alignment = MFI.getMaxAlignment();
778 
779  RoundedSize += Alignment;
780  if (LiveRegs.empty()) {
781  LiveRegs.init(TRI);
782  LiveRegs.addLiveIns(MBB);
783  LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
784  }
785 
786  unsigned ScratchSPReg = findScratchNonCalleeSaveRegister(
787  MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
788  assert(ScratchSPReg != AMDGPU::NoRegister &&
789  ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy);
790 
791  // s_add_u32 tmp_reg, s32, NumBytes
792  // s_and_b32 s32, tmp_reg, 0b111...0000
793  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
794  .addReg(StackPtrReg)
795  .addImm((Alignment - 1) * ST.getWavefrontSize())
796  .setMIFlag(MachineInstr::FrameSetup);
797  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
798  .addReg(ScratchSPReg, RegState::Kill)
799  .addImm(-Alignment * ST.getWavefrontSize())
800  .setMIFlag(MachineInstr::FrameSetup);
801  FuncInfo->setIsStackRealigned(true);
802  } else if ((HasFP = hasFP(MF))) {
803  // If we need a base pointer, set it up here. It's whatever the value of
804  // the stack pointer is at this point. Any variable size objects will be
805  // allocated after this, so we can still use the base pointer to reference
806  // locals.
807  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
808  .addReg(StackPtrReg)
810  }
811 
812  if (HasFP && RoundedSize != 0) {
813  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
814  .addReg(StackPtrReg)
815  .addImm(RoundedSize * ST.getWavefrontSize())
816  .setMIFlag(MachineInstr::FrameSetup);
817  }
818 
819  assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister ||
820  FuncInfo->FramePointerSaveIndex)) &&
821  "Needed to save FP but didn't save it anywhere");
822 
823  assert((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister &&
824  !FuncInfo->FramePointerSaveIndex)) &&
825  "Saved FP but didn't need it");
826 }
827 
829  MachineBasicBlock &MBB) const {
830  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
831  if (FuncInfo->isEntryFunction())
832  return;
833 
834  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
835  const SIInstrInfo *TII = ST.getInstrInfo();
838  LivePhysRegs LiveRegs;
839  DebugLoc DL;
840 
841  const MachineFrameInfo &MFI = MF.getFrameInfo();
842  uint32_t NumBytes = MFI.getStackSize();
843  uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
844  NumBytes + MFI.getMaxAlignment() : NumBytes;
845 
846  if (RoundedSize != 0 && hasFP(MF)) {
847  const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
848  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
849  .addReg(StackPtrReg)
850  .addImm(RoundedSize * ST.getWavefrontSize())
851  .setMIFlag(MachineInstr::FrameDestroy);
852  }
853 
854  if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
855  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg())
856  .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
858  }
859 
860  if (FuncInfo->FramePointerSaveIndex) {
861  const int FI = FuncInfo->FramePointerSaveIndex.getValue();
862 
865 
867  = FuncInfo->getSGPRToVGPRSpills(FI);
868  assert(Spill.size() == 1);
869  BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
870  FuncInfo->getFrameOffsetReg())
871  .addReg(Spill[0].VGPR)
872  .addImm(Spill[0].Lane);
873  }
874 
875  unsigned ScratchExecCopy = AMDGPU::NoRegister;
877  : FuncInfo->getSGPRSpillVGPRs()) {
878  if (!Reg.FI.hasValue())
879  continue;
880 
881  const SIRegisterInfo &TRI = TII->getRegisterInfo();
882  if (ScratchExecCopy == AMDGPU::NoRegister) {
883  // See emitPrologue
884  if (LiveRegs.empty()) {
885  LiveRegs.init(*ST.getRegisterInfo());
886  LiveRegs.addLiveOuts(MBB);
887  LiveRegs.stepBackward(*MBBI);
888  }
889 
890  ScratchExecCopy = findScratchNonCalleeSaveRegister(
891  MRI, LiveRegs, *TRI.getWaveMaskRegClass());
892  LiveRegs.removeReg(ScratchExecCopy);
893 
894  const unsigned OrSaveExec =
895  ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
896 
897  BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
898  .addImm(-1);
899  }
900 
901  buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
902  FuncInfo->getScratchRSrcReg(),
903  FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue());
904  }
905 
906  if (ScratchExecCopy != AMDGPU::NoRegister) {
907  // FIXME: Split block and make terminator.
908  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
909  unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
910  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
911  .addReg(ScratchExecCopy, RegState::Kill);
912  }
913 }
914 
915 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
916 // memory. They should have been removed by now.
917 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
918  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
919  I != E; ++I) {
920  if (!MFI.isDeadObjectIndex(I))
921  return false;
922  }
923 
924  return true;
925 }
926 
927 #ifndef NDEBUG
928 static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
929  Optional<int> FramePointerSaveIndex) {
930  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
931  I != E; ++I) {
932  if (!MFI.isDeadObjectIndex(I) &&
934  FramePointerSaveIndex && I != FramePointerSaveIndex) {
935  return false;
936  }
937  }
938 
939  return true;
940 }
941 #endif
942 
944  unsigned &FrameReg) const {
945  const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
946 
947  FrameReg = RI->getFrameRegister(MF);
948  return MF.getFrameInfo().getObjectOffset(FI);
949 }
950 
952  MachineFunction &MF,
953  RegScavenger *RS) const {
954  MachineFrameInfo &MFI = MF.getFrameInfo();
955 
956  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
957  const SIRegisterInfo *TRI = ST.getRegisterInfo();
959 
960  FuncInfo->removeDeadFrameIndices(MFI);
962  "SGPR spill should have been removed in SILowerSGPRSpills");
963 
964  // FIXME: The other checks should be redundant with allStackObjectsAreDead,
965  // but currently hasNonSpillStackObjects is set only from source
966  // allocas. Stack temps produced from legalization are not counted currently.
967  if (!allStackObjectsAreDead(MFI)) {
968  assert(RS && "RegScavenger required if spilling");
969 
970  if (FuncInfo->isEntryFunction()) {
971  int ScavengeFI = MFI.CreateFixedObject(
972  TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
973  RS->addScavengingFrameIndex(ScavengeFI);
974  } else {
975  int ScavengeFI = MFI.CreateStackObject(
976  TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
977  TRI->getSpillAlignment(AMDGPU::SGPR_32RegClass),
978  false);
979  RS->addScavengingFrameIndex(ScavengeFI);
980  }
981  }
982 }
983 
984 // Only report VGPRs to generic code.
986  BitVector &SavedVGPRs,
987  RegScavenger *RS) const {
988  TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
990  if (MFI->isEntryFunction())
991  return;
992 
993  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
994  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
995  const SIRegisterInfo *TRI = ST.getRegisterInfo();
996 
997  // Ignore the SGPRs the default implementation found.
998  SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask());
999 
1000  // hasFP only knows about stack objects that already exist. We're now
1001  // determining the stack slots that will be created, so we have to predict
1002  // them. Stack objects force FP usage with calls.
1003  //
1004  // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1005  // don't want to report it here.
1006  //
1007  // FIXME: Is this really hasReservedCallFrame?
1008  const bool WillHaveFP =
1009  FrameInfo.hasCalls() &&
1010  (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1011 
1012  // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1013  // so don't allow the default insertion to handle them.
1014  for (auto SSpill : MFI->getSGPRSpillVGPRs())
1015  SavedVGPRs.reset(SSpill.VGPR);
1016 
1017  const bool HasFP = WillHaveFP || hasFP(MF);
1018  if (!HasFP)
1019  return;
1020 
1021  if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
1022  int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
1024 
1025  // If there is already a VGPR with free lanes, use it. We may already have
1026  // to pay the penalty for spilling a CSR VGPR.
1027  if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
1028  llvm_unreachable("allocate SGPR spill should have worked");
1029 
1030  MFI->FramePointerSaveIndex = NewFI;
1031 
1032  LLVM_DEBUG(
1033  auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
1034  dbgs() << "Spilling FP to " << printReg(Spill.VGPR, TRI)
1035  << ':' << Spill.Lane << '\n');
1036  return;
1037  }
1038 
1040 
1041  if (!MFI->SGPRForFPSaveRestoreCopy) {
1042  // There's no free lane to spill, and no free register to save FP, so we're
1043  // forced to spill another VGPR to use for the spill.
1044  int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
1046  if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
1047  llvm_unreachable("allocate SGPR spill should have worked");
1048  MFI->FramePointerSaveIndex = NewFI;
1049 
1050  LLVM_DEBUG(
1051  auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
1052  dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI)
1053  << ':' << Spill.Lane << '\n';);
1054  } else {
1055  LLVM_DEBUG(dbgs() << "Saving FP with copy to " <<
1056  printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n');
1057  }
1058 }
1059 
1061  BitVector &SavedRegs,
1062  RegScavenger *RS) const {
1063  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1065  if (MFI->isEntryFunction())
1066  return;
1067 
1068  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1069  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1070 
1071  // The SP is specifically managed and we don't want extra spills of it.
1072  SavedRegs.reset(MFI->getStackPtrOffsetReg());
1073  SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask());
1074 }
1075 
1077  MachineFunction &MF, const TargetRegisterInfo *TRI,
1078  std::vector<CalleeSavedInfo> &CSI) const {
1079  if (CSI.empty())
1080  return true; // Early exit if no callee saved registers are modified!
1081 
1082  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1083  if (!FuncInfo->SGPRForFPSaveRestoreCopy)
1084  return false;
1085 
1086  for (auto &CS : CSI) {
1087  if (CS.getReg() == FuncInfo->getFrameOffsetReg()) {
1088  if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister)
1089  CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1090  break;
1091  }
1092  }
1093 
1094  return false;
1095 }
1096 
1098  MachineFunction &MF,
1099  MachineBasicBlock &MBB,
1100  MachineBasicBlock::iterator I) const {
1101  int64_t Amount = I->getOperand(0).getImm();
1102  if (Amount == 0)
1103  return MBB.erase(I);
1104 
1105  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1106  const SIInstrInfo *TII = ST.getInstrInfo();
1107  const DebugLoc &DL = I->getDebugLoc();
1108  unsigned Opc = I->getOpcode();
1109  bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1110  uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1111 
1112  if (!hasReservedCallFrame(MF)) {
1113  unsigned Align = getStackAlignment();
1114 
1115  Amount = alignTo(Amount, Align);
1116  assert(isUInt<32>(Amount) && "exceeded stack address space size");
1118  unsigned SPReg = MFI->getStackPtrOffsetReg();
1119 
1120  unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
1121  BuildMI(MBB, I, DL, TII->get(Op), SPReg)
1122  .addReg(SPReg)
1123  .addImm(Amount * ST.getWavefrontSize());
1124  } else if (CalleePopAmount != 0) {
1125  llvm_unreachable("is this used?");
1126  }
1127 
1128  return MBB.erase(I);
1129 }
1130 
1132  const MachineFrameInfo &MFI = MF.getFrameInfo();
1133  if (MFI.hasCalls()) {
1134  // All offsets are unsigned, so need to be addressed in the same direction
1135  // as stack growth.
1136 
1137  // FIXME: This function is pretty broken, since it can be called before the
1138  // frame layout is determined or CSR spills are inserted.
1139  if (MFI.getStackSize() != 0)
1140  return true;
1141 
1142  // For the entry point, the input wave scratch offset must be copied to the
1143  // API SP if there are calls.
1145  return true;
1146  }
1147 
1148  return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
1149  MFI.hasStackMap() || MFI.hasPatchPoint() ||
1150  MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
1152 }
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:385
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition: BitVector.h:781
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
Interface definition for SIRegisterInfo.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
AMDGPU specific subclass of TargetSubtarget.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
unsigned Reg
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:637
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
bool isSupportedStackID(TargetStackID::Value ID) const override
F(f)
void setIsStackRealigned(bool Realigned=true)
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:181
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void removeDeadFrameIndices(MachineFrameInfo &MFI)
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const SIInstrInfo *TII, unsigned SpillReg, unsigned ScratchRsrcReg, unsigned SPReg, int FI)
bool empty() const
Returns true if the set is empty.
Definition: LivePhysRegs.h:76
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
A description of a memory reference used in the backend.
bool isMesaGfxShader(const Function &F) const
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
const HexagonInstrInfo * TII
uint64_t getScratchRsrcWords23() const
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The memory access is dereferenceable (i.e., doesn&#39;t trap).
static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI, Optional< int > FramePointerSaveIndex)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const
returns true if NumLanes slots are available in VGPRs already used for SGPR spilling.
void setStackPtrOffsetReg(unsigned Reg)
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
int getObjectIndexBegin() const
Return the minimum frame object index.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:196
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
Class to represent pointers.
Definition: DerivedTypes.h:579
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isCompute(CallingConv::ID cc)
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every &#39;0&#39; bit in Mask.
Definition: BitVector.h:793
This file declares the machine register scavenger class.
const TargetRegisterInfo * getTargetRegisterInfo() const
unsigned const MachineRegisterInfo * MRI
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
void addLiveIns(const MachineBasicBlock &MBB)
Adds all live-in registers of basic block MBB.
static ArrayRef< MCPhysReg > getAllSGPR128(const GCNSubtarget &ST, const MachineFunction &MF)
bool any() const
any - Returns true if any bit is set.
Definition: BitVector.h:180
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:66
void stepBackward(const MachineInstr &MI)
Simulates liveness when stepping backwards over an instruction(bundle).
static ArrayRef< MCPhysReg > getAllSGPRs(const GCNSubtarget &ST, const MachineFunction &MF)
BitVector & reset()
Definition: BitVector.h:438
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Generation getGeneration() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
ArrayRef< SGPRSpillVGPRCSR > getSGPRSpillVGPRs() const
void setScratchWaveOffsetReg(unsigned Reg)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MachineBasicBlock & front() const
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
This class contains a discriminated union of information about pointers in memory operands...
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
The memory access writes data.
unsigned getWavefrontSize() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:202
bool hasSGPRInitBug() const
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
Definition: ArrayRef.h:193
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
Address space for constant memory (VTX2).
Definition: AMDGPU.h:273
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array...
Definition: ArrayRef.h:178
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
bool flatScratchIsPointer() const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:163
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:48
static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const SIInstrInfo *TII, unsigned SpillReg, unsigned ScratchRsrcReg, unsigned SPReg, int FI)
TargetOptions Options
unsigned SGPRForFPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the frame pointer.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned getImplicitBufferPtrUserSGPR() const
The memory access always returns the same value (or traps).
bool isAmdHsaOrMesa(const Function &F) const
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const TargetRegisterClass * getWaveMaskRegClass() const
uint8_t getStackID(int ObjectIdx) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Register getFrameRegister(const MachineFunction &MF) const override
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:212
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it...
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:79
static unsigned findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, const TargetRegisterClass &RC, bool Unused=false)
void removeReg(MCPhysReg Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set...
Definition: LivePhysRegs.h:89
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool hasCalls() const
Return true if the current function has any function calls.
Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
const SIRegisterInfo * getRegisterInfo() const override