Bug Summary

File:llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Warning:line 372, column 9
Value stored to 'HandledScratchWaveOffsetReg' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIFrameLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/include -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-12-05-225554-32688-1 -x c++ /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
1//===----------------------- SIFrameLowering.cpp --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8
9#include "SIFrameLowering.h"
10#include "AMDGPUSubtarget.h"
11#include "SIInstrInfo.h"
12#include "SIMachineFunctionInfo.h"
13#include "SIRegisterInfo.h"
14#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15
16#include "llvm/CodeGen/LivePhysRegs.h"
17#include "llvm/CodeGen/MachineFrameInfo.h"
18#include "llvm/CodeGen/MachineFunction.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/RegisterScavenging.h"
21
22using namespace llvm;
23
24#define DEBUG_TYPE"frame-info" "frame-info"
25
26
27static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST,
28 const MachineFunction &MF) {
29 return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
30 ST.getMaxNumSGPRs(MF) / 4);
31}
32
33static ArrayRef<MCPhysReg> getAllSGPRs(const GCNSubtarget &ST,
34 const MachineFunction &MF) {
35 return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
36 ST.getMaxNumSGPRs(MF));
37}
38
39// Find a scratch register that we can use at the start of the prologue to
40// re-align the stack pointer. We avoid using callee-save registers since they
41// may appear to be free when this is called from canUseAsPrologue (during
42// shrink wrapping), but then no longer be free when this is called from
43// emitPrologue.
44//
45// FIXME: This is a bit conservative, since in the above case we could use one
46// of the callee-save registers as a scratch temp to re-align the stack pointer,
47// but we would then have to make sure that we were in fact saving at least one
48// callee-save register in the prologue, which is additional complexity that
49// doesn't seem worth the benefit.
50static unsigned findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
51 LivePhysRegs &LiveRegs,
52 const TargetRegisterClass &RC,
53 bool Unused = false) {
54 // Mark callee saved registers as used so we will not choose them.
55 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
56 for (unsigned i = 0; CSRegs[i]; ++i)
57 LiveRegs.addReg(CSRegs[i]);
58
59 if (Unused) {
60 // We are looking for a register that can be used throughout the entire
61 // function, so any use is unacceptable.
62 for (unsigned Reg : RC) {
63 if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
64 return Reg;
65 }
66 } else {
67 for (unsigned Reg : RC) {
68 if (LiveRegs.available(MRI, Reg))
69 return Reg;
70 }
71 }
72
73 // If we require an unused register, this is used in contexts where failure is
74 // an option and has an alternative plan. In other contexts, this must
75 // succeed0.
76 if (!Unused)
77 report_fatal_error("failed to find free scratch register");
78
79 return AMDGPU::NoRegister;
80}
81
82static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI) {
83 LivePhysRegs LiveRegs;
84 LiveRegs.init(*MRI.getTargetRegisterInfo());
85 return findScratchNonCalleeSaveRegister(
86 MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
87}
88
89// We need to specially emit stack operations here because a different frame
90// register is used than in the rest of the function, as getFrameRegister would
91// use.
92static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
93 MachineBasicBlock::iterator I,
94 const SIInstrInfo *TII, unsigned SpillReg,
95 unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
96 MachineFunction *MF = MBB.getParent();
97 MachineFrameInfo &MFI = MF->getFrameInfo();
98
99 int64_t Offset = MFI.getObjectOffset(FI);
100
101 MachineMemOperand *MMO = MF->getMachineMemOperand(
102 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4,
103 MFI.getObjectAlignment(FI));
104
105 if (isUInt<12>(Offset)) {
106 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
107 .addReg(SpillReg, RegState::Kill)
108 .addReg(ScratchRsrcReg)
109 .addReg(SPReg)
110 .addImm(Offset)
111 .addImm(0) // glc
112 .addImm(0) // slc
113 .addImm(0) // tfe
114 .addImm(0) // dlc
115 .addImm(0) // swz
116 .addMemOperand(MMO);
117 return;
118 }
119
120 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
121 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
122
123 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
124 .addImm(Offset);
125
126 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
127 .addReg(SpillReg, RegState::Kill)
128 .addReg(OffsetReg, RegState::Kill)
129 .addReg(ScratchRsrcReg)
130 .addReg(SPReg)
131 .addImm(0)
132 .addImm(0) // glc
133 .addImm(0) // slc
134 .addImm(0) // tfe
135 .addImm(0) // dlc
136 .addImm(0) // swz
137 .addMemOperand(MMO);
138}
139
140static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
141 MachineBasicBlock::iterator I,
142 const SIInstrInfo *TII, unsigned SpillReg,
143 unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
144 MachineFunction *MF = MBB.getParent();
145 MachineFrameInfo &MFI = MF->getFrameInfo();
146 int64_t Offset = MFI.getObjectOffset(FI);
147
148 MachineMemOperand *MMO = MF->getMachineMemOperand(
149 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4,
150 MFI.getObjectAlignment(FI));
151
152 if (isUInt<12>(Offset)) {
153 BuildMI(MBB, I, DebugLoc(),
154 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg)
155 .addReg(ScratchRsrcReg)
156 .addReg(SPReg)
157 .addImm(Offset)
158 .addImm(0) // glc
159 .addImm(0) // slc
160 .addImm(0) // tfe
161 .addImm(0) // dlc
162 .addImm(0) // swz
163 .addMemOperand(MMO);
164 return;
165 }
166
167 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
168 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
169
170 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
171 .addImm(Offset);
172
173 BuildMI(MBB, I, DebugLoc(),
174 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg)
175 .addReg(OffsetReg, RegState::Kill)
176 .addReg(ScratchRsrcReg)
177 .addReg(SPReg)
178 .addImm(0)
179 .addImm(0) // glc
180 .addImm(0) // slc
181 .addImm(0) // tfe
182 .addImm(0) // dlc
183 .addImm(0) // swz
184 .addMemOperand(MMO);
185}
186
187void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
188 MachineFunction &MF,
189 MachineBasicBlock &MBB) const {
190 const SIInstrInfo *TII = ST.getInstrInfo();
191 const SIRegisterInfo* TRI = &TII->getRegisterInfo();
192 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
193
194 // We don't need this if we only have spills since there is no user facing
195 // scratch.
196
197 // TODO: If we know we don't have flat instructions earlier, we can omit
198 // this from the input registers.
199 //
200 // TODO: We only need to know if we access scratch space through a flat
201 // pointer. Because we only detect if flat instructions are used at all,
202 // this will be used more often than necessary on VI.
203
204 // Debug location must be unknown since the first debug location is used to
205 // determine the end of the prologue.
206 DebugLoc DL;
207 MachineBasicBlock::iterator I = MBB.begin();
208
209 Register FlatScratchInitReg =
210 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
211
212 MachineRegisterInfo &MRI = MF.getRegInfo();
213 MRI.addLiveIn(FlatScratchInitReg);
214 MBB.addLiveIn(FlatScratchInitReg);
215
216 Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
217 Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
218
219 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
220
221 // Do a 64-bit pointer add.
222 if (ST.flatScratchIsPointer()) {
223 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
224 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
225 .addReg(FlatScrInitLo)
226 .addReg(ScratchWaveOffsetReg);
227 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
228 .addReg(FlatScrInitHi)
229 .addImm(0);
230 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
231 addReg(FlatScrInitLo).
232 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
233 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
234 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
235 addReg(FlatScrInitHi).
236 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
237 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
238 return;
239 }
240
241 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
242 .addReg(FlatScrInitLo)
243 .addReg(ScratchWaveOffsetReg);
244 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
245 .addReg(FlatScrInitHi)
246 .addImm(0);
247
248 return;
249 }
250
251 assert(ST.getGeneration() < AMDGPUSubtarget::GFX10)((ST.getGeneration() < AMDGPUSubtarget::GFX10) ? static_cast
<void> (0) : __assert_fail ("ST.getGeneration() < AMDGPUSubtarget::GFX10"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 251, __PRETTY_FUNCTION__))
;
252
253 // Copy the size in bytes.
254 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
255 .addReg(FlatScrInitHi, RegState::Kill);
256
257 // Add wave offset in bytes to private base offset.
258 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
259 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
260 .addReg(FlatScrInitLo)
261 .addReg(ScratchWaveOffsetReg);
262
263 // Convert offset to 256-byte units.
264 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
265 .addReg(FlatScrInitLo, RegState::Kill)
266 .addImm(8);
267}
268
269unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
270 const GCNSubtarget &ST,
271 const SIInstrInfo *TII,
272 const SIRegisterInfo *TRI,
273 SIMachineFunctionInfo *MFI,
274 MachineFunction &MF) const {
275 MachineRegisterInfo &MRI = MF.getRegInfo();
276
277 // We need to insert initialization of the scratch resource descriptor.
278 unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
279 if (ScratchRsrcReg == AMDGPU::NoRegister ||
280 !MRI.isPhysRegUsed(ScratchRsrcReg))
281 return AMDGPU::NoRegister;
282
283 if (ST.hasSGPRInitBug() ||
284 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
285 return ScratchRsrcReg;
286
287 // We reserved the last registers for this. Shift it down to the end of those
288 // which were actually used.
289 //
290 // FIXME: It might be safer to use a pseudoregister before replacement.
291
292 // FIXME: We should be able to eliminate unused input registers. We only
293 // cannot do this for the resources required for scratch access. For now we
294 // skip over user SGPRs and may leave unused holes.
295
296 // We find the resource first because it has an alignment requirement.
297
298 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
299 ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
300 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
301
302 // Skip the last N reserved elements because they should have already been
303 // reserved for VCC etc.
304 for (MCPhysReg Reg : AllSGPR128s) {
305 // Pick the first unallocated one. Make sure we don't clobber the other
306 // reserved input we needed.
307 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
308 MRI.replaceRegWith(ScratchRsrcReg, Reg);
309 MFI->setScratchRSrcReg(Reg);
310 return Reg;
311 }
312 }
313
314 return ScratchRsrcReg;
315}
316
317// Shift down registers reserved for the scratch wave offset.
318std::pair<unsigned, bool>
319SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
320 const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
321 SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
322 MachineRegisterInfo &MRI = MF.getRegInfo();
323 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
324
325 assert(MFI->isEntryFunction())((MFI->isEntryFunction()) ? static_cast<void> (0) : __assert_fail
("MFI->isEntryFunction()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 325, __PRETTY_FUNCTION__))
;
326
327 // No replacement necessary.
328 if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
329 (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
330 return std::make_pair(AMDGPU::NoRegister, false);
331 }
332
333 if (ST.hasSGPRInitBug())
334 return std::make_pair(ScratchWaveOffsetReg, false);
335
336 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
337
338 ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
339 if (NumPreloaded > AllSGPRs.size())
340 return std::make_pair(ScratchWaveOffsetReg, false);
341
342 AllSGPRs = AllSGPRs.slice(NumPreloaded);
343
344 // We need to drop register from the end of the list that we cannot use
345 // for the scratch wave offset.
346 // + 2 s102 and s103 do not exist on VI.
347 // + 2 for vcc
348 // + 2 for xnack_mask
349 // + 2 for flat_scratch
350 // + 4 for registers reserved for scratch resource register
351 // + 1 for register reserved for scratch wave offset. (By exluding this
352 // register from the list to consider, it means that when this
353 // register is being used for the scratch wave offset and there
354 // are no other free SGPRs, then the value will stay in this register.
355 // + 1 if stack pointer is used.
356 // ----
357 // 13 (+1)
358 unsigned ReservedRegCount = 13;
359
360 if (AllSGPRs.size() < ReservedRegCount)
361 return std::make_pair(ScratchWaveOffsetReg, false);
362
363 bool HandledScratchWaveOffsetReg =
364 ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
365 bool FPAdjusted = false;
366
367 for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
368 // Pick the first unallocated SGPR. Be careful not to pick an alias of the
369 // scratch descriptor, since we haven’t added its uses yet.
370 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
371 if (!HandledScratchWaveOffsetReg) {
372 HandledScratchWaveOffsetReg = true;
Value stored to 'HandledScratchWaveOffsetReg' is never read
373
374 MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
375 if (MFI->getScratchWaveOffsetReg() == MFI->getStackPtrOffsetReg()) {
376 assert(!hasFP(MF))((!hasFP(MF)) ? static_cast<void> (0) : __assert_fail (
"!hasFP(MF)", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 376, __PRETTY_FUNCTION__))
;
377 MFI->setStackPtrOffsetReg(Reg);
378 }
379
380 MFI->setScratchWaveOffsetReg(Reg);
381 MFI->setFrameOffsetReg(Reg);
382 ScratchWaveOffsetReg = Reg;
383 FPAdjusted = true;
384 break;
385 }
386 }
387 }
388
389 return std::make_pair(ScratchWaveOffsetReg, FPAdjusted);
390}
391
392void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
393 MachineBasicBlock &MBB) const {
394 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported")((&MF.front() == &MBB && "Shrink-wrapping not yet supported"
) ? static_cast<void> (0) : __assert_fail ("&MF.front() == &MBB && \"Shrink-wrapping not yet supported\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 394, __PRETTY_FUNCTION__))
;
395
396 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
397
398 // If we only have SGPR spills, we won't actually be using scratch memory
399 // since these spill to VGPRs.
400 //
401 // FIXME: We should be cleaning up these unused SGPR spill frame indices
402 // somewhere.
403
404 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
405 const SIInstrInfo *TII = ST.getInstrInfo();
406 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
407 MachineRegisterInfo &MRI = MF.getRegInfo();
408 const Function &F = MF.getFunction();
409
410 // We need to do the replacement of the private segment buffer and wave offset
411 // register even if there are no stack objects. There could be stores to undef
412 // or a constant without an associated object.
413
414 // FIXME: We still have implicit uses on SGPR spill instructions in case they
415 // need to spill to vector memory. It's likely that will not happen, but at
416 // this point it appears we need the setup. This part of the prolog should be
417 // emitted after frame indices are eliminated.
418
419 if (MFI->hasFlatScratchInit())
420 emitFlatScratchInit(ST, MF, MBB);
421
422 unsigned ScratchRsrcReg
423 = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
424
425 unsigned ScratchWaveOffsetReg;
426 bool FPAdjusted;
427 std::tie(ScratchWaveOffsetReg, FPAdjusted) =
428 getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
429
430 // We need to insert initialization of the scratch resource descriptor.
431 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
432 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
433
434 unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
435 if (ST.isAmdHsaOrMesa(F)) {
436 PreloadedPrivateBufferReg = MFI->getPreloadedReg(
437 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
438 }
439
440 bool OffsetRegUsed = ScratchWaveOffsetReg != AMDGPU::NoRegister &&
441 MRI.isPhysRegUsed(ScratchWaveOffsetReg);
442 bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
443 MRI.isPhysRegUsed(ScratchRsrcReg);
444
445 // FIXME: Hack to not crash in situations which emitted an error.
446 if (PreloadedScratchWaveOffsetReg == AMDGPU::NoRegister)
447 return;
448
449 // We added live-ins during argument lowering, but since they were not used
450 // they were deleted. We're adding the uses now, so add them back.
451 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
452 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
453
454 if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
455 assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F))((ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F)) ? static_cast
<void> (0) : __assert_fail ("ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F)"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 455, __PRETTY_FUNCTION__))
;
456 MRI.addLiveIn(PreloadedPrivateBufferReg);
457 MBB.addLiveIn(PreloadedPrivateBufferReg);
458 }
459
460 // Make the register selected live throughout the function.
461 for (MachineBasicBlock &OtherBB : MF) {
462 if (&OtherBB == &MBB)
463 continue;
464
465 if (OffsetRegUsed || FPAdjusted)
466 OtherBB.addLiveIn(ScratchWaveOffsetReg);
467
468 if (ResourceRegUsed)
469 OtherBB.addLiveIn(ScratchRsrcReg);
470 }
471
472 DebugLoc DL;
473 MachineBasicBlock::iterator I = MBB.begin();
474
475 // If we reserved the original input registers, we don't need to copy to the
476 // reserved registers.
477
478 bool CopyBuffer = ResourceRegUsed &&
479 PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
480 ST.isAmdHsaOrMesa(F) &&
481 ScratchRsrcReg != PreloadedPrivateBufferReg;
482
483 // This needs to be careful of the copying order to avoid overwriting one of
484 // the input registers before it's been copied to it's final
485 // destination. Usually the offset should be copied first.
486 bool CopyBufferFirst = TRI->isSubRegisterEq(PreloadedPrivateBufferReg,
487 ScratchWaveOffsetReg);
488 if (CopyBuffer && CopyBufferFirst) {
489 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
490 .addReg(PreloadedPrivateBufferReg, RegState::Kill);
491 }
492
493 unsigned SPReg = MFI->getStackPtrOffsetReg();
494 assert(SPReg != AMDGPU::SP_REG)((SPReg != AMDGPU::SP_REG) ? static_cast<void> (0) : __assert_fail
("SPReg != AMDGPU::SP_REG", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 494, __PRETTY_FUNCTION__))
;
495
496 // FIXME: Remove the isPhysRegUsed checks
497 const bool HasFP = hasFP(MF);
498
499 if (HasFP || OffsetRegUsed) {
500 assert(ScratchWaveOffsetReg)((ScratchWaveOffsetReg) ? static_cast<void> (0) : __assert_fail
("ScratchWaveOffsetReg", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 500, __PRETTY_FUNCTION__))
;
501 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
502 .addReg(PreloadedScratchWaveOffsetReg, HasFP ? RegState::Kill : 0);
503 }
504
505 if (CopyBuffer && !CopyBufferFirst) {
506 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
507 .addReg(PreloadedPrivateBufferReg, RegState::Kill);
508 }
509
510 if (ResourceRegUsed) {
511 emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,
512 PreloadedPrivateBufferReg, ScratchRsrcReg);
513 }
514
515 if (HasFP) {
516 DebugLoc DL;
517 const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
518 int64_t StackSize = FrameInfo.getStackSize();
519
520 // On kernel entry, the private scratch wave offset is the SP value.
521 if (StackSize == 0) {
522 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SPReg)
523 .addReg(MFI->getScratchWaveOffsetReg());
524 } else {
525 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
526 .addReg(MFI->getScratchWaveOffsetReg())
527 .addImm(StackSize * ST.getWavefrontSize());
528 }
529 }
530}
531
532// Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
533void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
534 MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI,
535 MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg,
536 unsigned ScratchRsrcReg) const {
537
538 const SIInstrInfo *TII = ST.getInstrInfo();
539 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
540 const Function &Fn = MF.getFunction();
541 DebugLoc DL;
542
543 if (ST.isAmdPalOS()) {
544 // The pointer to the GIT is formed from the offset passed in and either
545 // the amdgpu-git-ptr-high function attribute or the top part of the PC
546 Register RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
547 Register RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
548 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
549
550 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
551
552 if (MFI->getGITPtrHigh() != 0xffffffff) {
553 BuildMI(MBB, I, DL, SMovB32, RsrcHi)
554 .addImm(MFI->getGITPtrHigh())
555 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
556 } else {
557 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
558 BuildMI(MBB, I, DL, GetPC64, Rsrc01);
559 }
560 auto GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
561 if (ST.hasMergedShaders()) {
562 switch (MF.getFunction().getCallingConv()) {
563 case CallingConv::AMDGPU_HS:
564 case CallingConv::AMDGPU_GS:
565 // Low GIT address is passed in s8 rather than s0 for an LS+HS or
566 // ES+GS merged shader on gfx9+.
567 GitPtrLo = AMDGPU::SGPR8;
568 break;
569 default:
570 break;
571 }
572 }
573 MF.getRegInfo().addLiveIn(GitPtrLo);
574 MBB.addLiveIn(GitPtrLo);
575 BuildMI(MBB, I, DL, SMovB32, RsrcLo)
576 .addReg(GitPtrLo)
577 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
578
579 // We now have the GIT ptr - now get the scratch descriptor from the entry
580 // at offset 0 (or offset 16 for a compute shader).
581 PointerType *PtrTy =
582 PointerType::get(Type::getInt64Ty(MF.getFunction().getContext()),
583 AMDGPUAS::CONSTANT_ADDRESS);
584 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
585 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
586 auto MMO = MF.getMachineMemOperand(PtrInfo,
587 MachineMemOperand::MOLoad |
588 MachineMemOperand::MOInvariant |
589 MachineMemOperand::MODereferenceable,
590 16, 4);
591 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
592 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
593 unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset);
594 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
595 .addReg(Rsrc01)
596 .addImm(EncodedOffset) // offset
597 .addImm(0) // glc
598 .addImm(0) // dlc
599 .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
600 .addMemOperand(MMO);
601 return;
602 }
603 if (ST.isMesaGfxShader(Fn)
604 || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) {
605 assert(!ST.isAmdHsaOrMesa(Fn))((!ST.isAmdHsaOrMesa(Fn)) ? static_cast<void> (0) : __assert_fail
("!ST.isAmdHsaOrMesa(Fn)", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 605, __PRETTY_FUNCTION__))
;
606 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
607
608 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
609 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
610
611 // Use relocations to get the pointer, and setup the other bits manually.
612 uint64_t Rsrc23 = TII->getScratchRsrcWords23();
613
614 if (MFI->hasImplicitBufferPtr()) {
615 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
616
617 if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
618 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
619
620 BuildMI(MBB, I, DL, Mov64, Rsrc01)
621 .addReg(MFI->getImplicitBufferPtrUserSGPR())
622 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
623 } else {
624 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
625
626 PointerType *PtrTy =
627 PointerType::get(Type::getInt64Ty(MF.getFunction().getContext()),
628 AMDGPUAS::CONSTANT_ADDRESS);
629 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
630 auto MMO = MF.getMachineMemOperand(PtrInfo,
631 MachineMemOperand::MOLoad |
632 MachineMemOperand::MOInvariant |
633 MachineMemOperand::MODereferenceable,
634 8, 4);
635 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
636 .addReg(MFI->getImplicitBufferPtrUserSGPR())
637 .addImm(0) // offset
638 .addImm(0) // glc
639 .addImm(0) // dlc
640 .addMemOperand(MMO)
641 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
642
643 MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
644 MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
645 }
646 } else {
647 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
648 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
649
650 BuildMI(MBB, I, DL, SMovB32, Rsrc0)
651 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
652 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
653
654 BuildMI(MBB, I, DL, SMovB32, Rsrc1)
655 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
656 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
657
658 }
659
660 BuildMI(MBB, I, DL, SMovB32, Rsrc2)
661 .addImm(Rsrc23 & 0xffffffff)
662 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
663
664 BuildMI(MBB, I, DL, SMovB32, Rsrc3)
665 .addImm(Rsrc23 >> 32)
666 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
667 }
668}
669
670bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
671 switch (ID) {
672 case TargetStackID::Default:
673 case TargetStackID::NoAlloc:
674 case TargetStackID::SGPRSpill:
675 return true;
676 case TargetStackID::SVEVector:
677 return false;
678 }
679 llvm_unreachable("Invalid TargetStackID::Value")::llvm::llvm_unreachable_internal("Invalid TargetStackID::Value"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 679)
;
680}
681
682void SIFrameLowering::emitPrologue(MachineFunction &MF,
683 MachineBasicBlock &MBB) const {
684 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
685 if (FuncInfo->isEntryFunction()) {
686 emitEntryFunctionPrologue(MF, MBB);
687 return;
688 }
689
690 const MachineFrameInfo &MFI = MF.getFrameInfo();
691 MachineRegisterInfo &MRI = MF.getRegInfo();
692 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
693 const SIInstrInfo *TII = ST.getInstrInfo();
694 const SIRegisterInfo &TRI = TII->getRegisterInfo();
695
696 unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
697 unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
698 LivePhysRegs LiveRegs;
699
700 MachineBasicBlock::iterator MBBI = MBB.begin();
701 DebugLoc DL;
702
703 bool HasFP = false;
704 uint32_t NumBytes = MFI.getStackSize();
705 uint32_t RoundedSize = NumBytes;
706 // To avoid clobbering VGPRs in lanes that weren't active on function entry,
707 // turn on all lanes before doing the spill to memory.
708 unsigned ScratchExecCopy = AMDGPU::NoRegister;
709
710 // Emit the copy if we need an FP, and are using a free SGPR to save it.
711 if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
712 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
713 .addReg(FramePtrReg)
714 .setMIFlag(MachineInstr::FrameSetup);
715 }
716
717 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
718 : FuncInfo->getSGPRSpillVGPRs()) {
719 if (!Reg.FI.hasValue())
720 continue;
721
722 if (ScratchExecCopy == AMDGPU::NoRegister) {
723 if (LiveRegs.empty()) {
724 LiveRegs.init(TRI);
725 LiveRegs.addLiveIns(MBB);
726 if (FuncInfo->SGPRForFPSaveRestoreCopy)
727 LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
728 }
729
730 ScratchExecCopy
731 = findScratchNonCalleeSaveRegister(MRI, LiveRegs,
732 *TRI.getWaveMaskRegClass());
733 assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy)((FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy) ?
static_cast<void> (0) : __assert_fail ("FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 733, __PRETTY_FUNCTION__))
;
734
735 const unsigned OrSaveExec = ST.isWave32() ?
736 AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
737 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec),
738 ScratchExecCopy)
739 .addImm(-1);
740 }
741
742 buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
743 FuncInfo->getScratchRSrcReg(),
744 StackPtrReg,
745 Reg.FI.getValue());
746 }
747
748 if (ScratchExecCopy != AMDGPU::NoRegister) {
749 // FIXME: Split block and make terminator.
750 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
751 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
752 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
753 .addReg(ScratchExecCopy, RegState::Kill);
754 LiveRegs.addReg(ScratchExecCopy);
755 }
756
757
758 if (FuncInfo->FramePointerSaveIndex) {
759 const int FI = FuncInfo->FramePointerSaveIndex.getValue();
760 assert(!MFI.isDeadObjectIndex(FI) &&((!MFI.isDeadObjectIndex(FI) && MFI.getStackID(FI) ==
TargetStackID::SGPRSpill) ? static_cast<void> (0) : __assert_fail
("!MFI.isDeadObjectIndex(FI) && MFI.getStackID(FI) == TargetStackID::SGPRSpill"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 761, __PRETTY_FUNCTION__))
761 MFI.getStackID(FI) == TargetStackID::SGPRSpill)((!MFI.isDeadObjectIndex(FI) && MFI.getStackID(FI) ==
TargetStackID::SGPRSpill) ? static_cast<void> (0) : __assert_fail
("!MFI.isDeadObjectIndex(FI) && MFI.getStackID(FI) == TargetStackID::SGPRSpill"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 761, __PRETTY_FUNCTION__))
;
762 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
763 = FuncInfo->getSGPRToVGPRSpills(FI);
764 assert(Spill.size() == 1)((Spill.size() == 1) ? static_cast<void> (0) : __assert_fail
("Spill.size() == 1", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 764, __PRETTY_FUNCTION__))
;
765
766 // Save FP before setting it up.
767 // FIXME: This should respect spillSGPRToVGPR;
768 BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
769 Spill[0].VGPR)
770 .addReg(FramePtrReg)
771 .addImm(Spill[0].Lane)
772 .addReg(Spill[0].VGPR, RegState::Undef);
773 }
774
775 if (TRI.needsStackRealignment(MF)) {
776 HasFP = true;
777 const unsigned Alignment = MFI.getMaxAlignment();
778
779 RoundedSize += Alignment;
780 if (LiveRegs.empty()) {
781 LiveRegs.init(TRI);
782 LiveRegs.addLiveIns(MBB);
783 LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
784 }
785
786 unsigned ScratchSPReg = findScratchNonCalleeSaveRegister(
787 MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
788 assert(ScratchSPReg != AMDGPU::NoRegister &&((ScratchSPReg != AMDGPU::NoRegister && ScratchSPReg !=
FuncInfo->SGPRForFPSaveRestoreCopy) ? static_cast<void
> (0) : __assert_fail ("ScratchSPReg != AMDGPU::NoRegister && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 789, __PRETTY_FUNCTION__))
789 ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy)((ScratchSPReg != AMDGPU::NoRegister && ScratchSPReg !=
FuncInfo->SGPRForFPSaveRestoreCopy) ? static_cast<void
> (0) : __assert_fail ("ScratchSPReg != AMDGPU::NoRegister && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 789, __PRETTY_FUNCTION__))
;
790
791 // s_add_u32 tmp_reg, s32, NumBytes
792 // s_and_b32 s32, tmp_reg, 0b111...0000
793 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
794 .addReg(StackPtrReg)
795 .addImm((Alignment - 1) * ST.getWavefrontSize())
796 .setMIFlag(MachineInstr::FrameSetup);
797 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
798 .addReg(ScratchSPReg, RegState::Kill)
799 .addImm(-Alignment * ST.getWavefrontSize())
800 .setMIFlag(MachineInstr::FrameSetup);
801 FuncInfo->setIsStackRealigned(true);
802 } else if ((HasFP = hasFP(MF))) {
803 // If we need a base pointer, set it up here. It's whatever the value of
804 // the stack pointer is at this point. Any variable size objects will be
805 // allocated after this, so we can still use the base pointer to reference
806 // locals.
807 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
808 .addReg(StackPtrReg)
809 .setMIFlag(MachineInstr::FrameSetup);
810 }
811
812 if (HasFP && RoundedSize != 0) {
813 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
814 .addReg(StackPtrReg)
815 .addImm(RoundedSize * ST.getWavefrontSize())
816 .setMIFlag(MachineInstr::FrameSetup);
817 }
818
819 assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister ||(((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU
::NoRegister || FuncInfo->FramePointerSaveIndex)) &&
"Needed to save FP but didn't save it anywhere") ? static_cast
<void> (0) : __assert_fail ("(!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister || FuncInfo->FramePointerSaveIndex)) && \"Needed to save FP but didn't save it anywhere\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 821, __PRETTY_FUNCTION__))
820 FuncInfo->FramePointerSaveIndex)) &&(((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU
::NoRegister || FuncInfo->FramePointerSaveIndex)) &&
"Needed to save FP but didn't save it anywhere") ? static_cast
<void> (0) : __assert_fail ("(!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister || FuncInfo->FramePointerSaveIndex)) && \"Needed to save FP but didn't save it anywhere\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 821, __PRETTY_FUNCTION__))
821 "Needed to save FP but didn't save it anywhere")(((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU
::NoRegister || FuncInfo->FramePointerSaveIndex)) &&
"Needed to save FP but didn't save it anywhere") ? static_cast
<void> (0) : __assert_fail ("(!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister || FuncInfo->FramePointerSaveIndex)) && \"Needed to save FP but didn't save it anywhere\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 821, __PRETTY_FUNCTION__))
;
822
823 assert((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister &&(((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::
NoRegister && !FuncInfo->FramePointerSaveIndex)) &&
"Saved FP but didn't need it") ? static_cast<void> (0)
: __assert_fail ("(HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister && !FuncInfo->FramePointerSaveIndex)) && \"Saved FP but didn't need it\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 825, __PRETTY_FUNCTION__))
824 !FuncInfo->FramePointerSaveIndex)) &&(((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::
NoRegister && !FuncInfo->FramePointerSaveIndex)) &&
"Saved FP but didn't need it") ? static_cast<void> (0)
: __assert_fail ("(HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister && !FuncInfo->FramePointerSaveIndex)) && \"Saved FP but didn't need it\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 825, __PRETTY_FUNCTION__))
825 "Saved FP but didn't need it")(((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::
NoRegister && !FuncInfo->FramePointerSaveIndex)) &&
"Saved FP but didn't need it") ? static_cast<void> (0)
: __assert_fail ("(HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister && !FuncInfo->FramePointerSaveIndex)) && \"Saved FP but didn't need it\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 825, __PRETTY_FUNCTION__))
;
826}
827
828void SIFrameLowering::emitEpilogue(MachineFunction &MF,
829 MachineBasicBlock &MBB) const {
830 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
831 if (FuncInfo->isEntryFunction())
832 return;
833
834 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
835 const SIInstrInfo *TII = ST.getInstrInfo();
836 MachineRegisterInfo &MRI = MF.getRegInfo();
837 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
838 LivePhysRegs LiveRegs;
839 DebugLoc DL;
840
841 const MachineFrameInfo &MFI = MF.getFrameInfo();
842 uint32_t NumBytes = MFI.getStackSize();
843 uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
844 NumBytes + MFI.getMaxAlignment() : NumBytes;
845
846 if (RoundedSize != 0 && hasFP(MF)) {
847 const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
848 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
849 .addReg(StackPtrReg)
850 .addImm(RoundedSize * ST.getWavefrontSize())
851 .setMIFlag(MachineInstr::FrameDestroy);
852 }
853
854 if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
855 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg())
856 .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
857 .setMIFlag(MachineInstr::FrameSetup);
858 }
859
860 if (FuncInfo->FramePointerSaveIndex) {
861 const int FI = FuncInfo->FramePointerSaveIndex.getValue();
862
863 assert(!MF.getFrameInfo().isDeadObjectIndex(FI) &&((!MF.getFrameInfo().isDeadObjectIndex(FI) && MF.getFrameInfo
().getStackID(FI) == TargetStackID::SGPRSpill) ? static_cast<
void> (0) : __assert_fail ("!MF.getFrameInfo().isDeadObjectIndex(FI) && MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 864, __PRETTY_FUNCTION__))
864 MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill)((!MF.getFrameInfo().isDeadObjectIndex(FI) && MF.getFrameInfo
().getStackID(FI) == TargetStackID::SGPRSpill) ? static_cast<
void> (0) : __assert_fail ("!MF.getFrameInfo().isDeadObjectIndex(FI) && MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 864, __PRETTY_FUNCTION__))
;
865
866 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
867 = FuncInfo->getSGPRToVGPRSpills(FI);
868 assert(Spill.size() == 1)((Spill.size() == 1) ? static_cast<void> (0) : __assert_fail
("Spill.size() == 1", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 868, __PRETTY_FUNCTION__))
;
869 BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
870 FuncInfo->getFrameOffsetReg())
871 .addReg(Spill[0].VGPR)
872 .addImm(Spill[0].Lane);
873 }
874
875 unsigned ScratchExecCopy = AMDGPU::NoRegister;
876 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
877 : FuncInfo->getSGPRSpillVGPRs()) {
878 if (!Reg.FI.hasValue())
879 continue;
880
881 const SIRegisterInfo &TRI = TII->getRegisterInfo();
882 if (ScratchExecCopy == AMDGPU::NoRegister) {
883 // See emitPrologue
884 if (LiveRegs.empty()) {
885 LiveRegs.init(*ST.getRegisterInfo());
886 LiveRegs.addLiveOuts(MBB);
887 LiveRegs.stepBackward(*MBBI);
888 }
889
890 ScratchExecCopy = findScratchNonCalleeSaveRegister(
891 MRI, LiveRegs, *TRI.getWaveMaskRegClass());
892 LiveRegs.removeReg(ScratchExecCopy);
893
894 const unsigned OrSaveExec =
895 ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
896
897 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
898 .addImm(-1);
899 }
900
901 buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
902 FuncInfo->getScratchRSrcReg(),
903 FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue());
904 }
905
906 if (ScratchExecCopy != AMDGPU::NoRegister) {
907 // FIXME: Split block and make terminator.
908 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
909 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
910 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
911 .addReg(ScratchExecCopy, RegState::Kill);
912 }
913}
914
915// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
916// memory. They should have been removed by now.
917static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
918 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
919 I != E; ++I) {
920 if (!MFI.isDeadObjectIndex(I))
921 return false;
922 }
923
924 return true;
925}
926
927#ifndef NDEBUG
928static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
929 Optional<int> FramePointerSaveIndex) {
930 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
931 I != E; ++I) {
932 if (!MFI.isDeadObjectIndex(I) &&
933 MFI.getStackID(I) == TargetStackID::SGPRSpill &&
934 FramePointerSaveIndex && I != FramePointerSaveIndex) {
935 return false;
936 }
937 }
938
939 return true;
940}
941#endif
942
943int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
944 unsigned &FrameReg) const {
945 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
946
947 FrameReg = RI->getFrameRegister(MF);
948 return MF.getFrameInfo().getObjectOffset(FI);
949}
950
951void SIFrameLowering::processFunctionBeforeFrameFinalized(
952 MachineFunction &MF,
953 RegScavenger *RS) const {
954 MachineFrameInfo &MFI = MF.getFrameInfo();
955
956 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
957 const SIRegisterInfo *TRI = ST.getRegisterInfo();
958 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
959
960 FuncInfo->removeDeadFrameIndices(MFI);
961 assert(allSGPRSpillsAreDead(MFI, None) &&((allSGPRSpillsAreDead(MFI, None) && "SGPR spill should have been removed in SILowerSGPRSpills"
) ? static_cast<void> (0) : __assert_fail ("allSGPRSpillsAreDead(MFI, None) && \"SGPR spill should have been removed in SILowerSGPRSpills\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 962, __PRETTY_FUNCTION__))
962 "SGPR spill should have been removed in SILowerSGPRSpills")((allSGPRSpillsAreDead(MFI, None) && "SGPR spill should have been removed in SILowerSGPRSpills"
) ? static_cast<void> (0) : __assert_fail ("allSGPRSpillsAreDead(MFI, None) && \"SGPR spill should have been removed in SILowerSGPRSpills\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 962, __PRETTY_FUNCTION__))
;
963
964 // FIXME: The other checks should be redundant with allStackObjectsAreDead,
965 // but currently hasNonSpillStackObjects is set only from source
966 // allocas. Stack temps produced from legalization are not counted currently.
967 if (!allStackObjectsAreDead(MFI)) {
968 assert(RS && "RegScavenger required if spilling")((RS && "RegScavenger required if spilling") ? static_cast
<void> (0) : __assert_fail ("RS && \"RegScavenger required if spilling\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 968, __PRETTY_FUNCTION__))
;
969
970 if (FuncInfo->isEntryFunction()) {
971 int ScavengeFI = MFI.CreateFixedObject(
972 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
973 RS->addScavengingFrameIndex(ScavengeFI);
974 } else {
975 int ScavengeFI = MFI.CreateStackObject(
976 TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
977 TRI->getSpillAlignment(AMDGPU::SGPR_32RegClass),
978 false);
979 RS->addScavengingFrameIndex(ScavengeFI);
980 }
981 }
982}
983
984// Only report VGPRs to generic code.
985void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
986 BitVector &SavedVGPRs,
987 RegScavenger *RS) const {
988 TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
989 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
990 if (MFI->isEntryFunction())
991 return;
992
993 const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
994 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
995 const SIRegisterInfo *TRI = ST.getRegisterInfo();
996
997 // Ignore the SGPRs the default implementation found.
998 SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask());
999
1000 // hasFP only knows about stack objects that already exist. We're now
1001 // determining the stack slots that will be created, so we have to predict
1002 // them. Stack objects force FP usage with calls.
1003 //
1004 // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1005 // don't want to report it here.
1006 //
1007 // FIXME: Is this really hasReservedCallFrame?
1008 const bool WillHaveFP =
1009 FrameInfo.hasCalls() &&
1010 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1011
1012 // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1013 // so don't allow the default insertion to handle them.
1014 for (auto SSpill : MFI->getSGPRSpillVGPRs())
1015 SavedVGPRs.reset(SSpill.VGPR);
1016
1017 const bool HasFP = WillHaveFP || hasFP(MF);
1018 if (!HasFP)
1019 return;
1020
1021 if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
1022 int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
1023 TargetStackID::SGPRSpill);
1024
1025 // If there is already a VGPR with free lanes, use it. We may already have
1026 // to pay the penalty for spilling a CSR VGPR.
1027 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
1028 llvm_unreachable("allocate SGPR spill should have worked")::llvm::llvm_unreachable_internal("allocate SGPR spill should have worked"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 1028)
;
1029
1030 MFI->FramePointerSaveIndex = NewFI;
1031
1032 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI
).front(); dbgs() << "Spilling FP to " << printReg
(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n'
; } } while (false)
1033 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI
).front(); dbgs() << "Spilling FP to " << printReg
(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n'
; } } while (false)
1034 dbgs() << "Spilling FP to " << printReg(Spill.VGPR, TRI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI
).front(); dbgs() << "Spilling FP to " << printReg
(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n'
; } } while (false)
1035 << ':' << Spill.Lane << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI
).front(); dbgs() << "Spilling FP to " << printReg
(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n'
; } } while (false)
;
1036 return;
1037 }
1038
1039 MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo());
1040
1041 if (!MFI->SGPRForFPSaveRestoreCopy) {
1042 // There's no free lane to spill, and no free register to save FP, so we're
1043 // forced to spill another VGPR to use for the spill.
1044 int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
1045 TargetStackID::SGPRSpill);
1046 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
1047 llvm_unreachable("allocate SGPR spill should have worked")::llvm::llvm_unreachable_internal("allocate SGPR spill should have worked"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 1047)
;
1048 MFI->FramePointerSaveIndex = NewFI;
1049
1050 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI
).front(); dbgs() << "FP requires fallback spill to " <<
printReg(Spill.VGPR, TRI) << ':' << Spill.Lane <<
'\n';; } } while (false)
1051 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI
).front(); dbgs() << "FP requires fallback spill to " <<
printReg(Spill.VGPR, TRI) << ':' << Spill.Lane <<
'\n';; } } while (false)
1052 dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI
).front(); dbgs() << "FP requires fallback spill to " <<
printReg(Spill.VGPR, TRI) << ':' << Spill.Lane <<
'\n';; } } while (false)
1053 << ':' << Spill.Lane << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI
).front(); dbgs() << "FP requires fallback spill to " <<
printReg(Spill.VGPR, TRI) << ':' << Spill.Lane <<
'\n';; } } while (false)
;
1054 } else {
1055 LLVM_DEBUG(dbgs() << "Saving FP with copy to " <<do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("frame-info")) { dbgs() << "Saving FP with copy to " <<
printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n'
; } } while (false)
1056 printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("frame-info")) { dbgs() << "Saving FP with copy to " <<
printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n'
; } } while (false)
;
1057 }
1058}
1059
1060void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
1061 BitVector &SavedRegs,
1062 RegScavenger *RS) const {
1063 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1064 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1065 if (MFI->isEntryFunction())
1066 return;
1067
1068 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1069 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1070
1071 // The SP is specifically managed and we don't want extra spills of it.
1072 SavedRegs.reset(MFI->getStackPtrOffsetReg());
1073 SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask());
1074}
1075
1076bool SIFrameLowering::assignCalleeSavedSpillSlots(
1077 MachineFunction &MF, const TargetRegisterInfo *TRI,
1078 std::vector<CalleeSavedInfo> &CSI) const {
1079 if (CSI.empty())
1080 return true; // Early exit if no callee saved registers are modified!
1081
1082 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1083 if (!FuncInfo->SGPRForFPSaveRestoreCopy)
1084 return false;
1085
1086 for (auto &CS : CSI) {
1087 if (CS.getReg() == FuncInfo->getFrameOffsetReg()) {
1088 if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister)
1089 CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1090 break;
1091 }
1092 }
1093
1094 return false;
1095}
1096
1097MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
1098 MachineFunction &MF,
1099 MachineBasicBlock &MBB,
1100 MachineBasicBlock::iterator I) const {
1101 int64_t Amount = I->getOperand(0).getImm();
1102 if (Amount == 0)
1103 return MBB.erase(I);
1104
1105 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1106 const SIInstrInfo *TII = ST.getInstrInfo();
1107 const DebugLoc &DL = I->getDebugLoc();
1108 unsigned Opc = I->getOpcode();
1109 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1110 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1111
1112 if (!hasReservedCallFrame(MF)) {
1113 unsigned Align = getStackAlignment();
1114
1115 Amount = alignTo(Amount, Align);
1116 assert(isUInt<32>(Amount) && "exceeded stack address space size")((isUInt<32>(Amount) && "exceeded stack address space size"
) ? static_cast<void> (0) : __assert_fail ("isUInt<32>(Amount) && \"exceeded stack address space size\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 1116, __PRETTY_FUNCTION__))
;
1117 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1118 unsigned SPReg = MFI->getStackPtrOffsetReg();
1119
1120 unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
1121 BuildMI(MBB, I, DL, TII->get(Op), SPReg)
1122 .addReg(SPReg)
1123 .addImm(Amount * ST.getWavefrontSize());
1124 } else if (CalleePopAmount != 0) {
1125 llvm_unreachable("is this used?")::llvm::llvm_unreachable_internal("is this used?", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp"
, 1125)
;
1126 }
1127
1128 return MBB.erase(I);
1129}
1130
1131bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
1132 const MachineFrameInfo &MFI = MF.getFrameInfo();
1133 if (MFI.hasCalls()) {
1134 // All offsets are unsigned, so need to be addressed in the same direction
1135 // as stack growth.
1136
1137 // FIXME: This function is pretty broken, since it can be called before the
1138 // frame layout is determined or CSR spills are inserted.
1139 if (MFI.getStackSize() != 0)
1140 return true;
1141
1142 // For the entry point, the input wave scratch offset must be copied to the
1143 // API SP if there are calls.
1144 if (MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction())
1145 return true;
1146 }
1147
1148 return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
1149 MFI.hasStackMap() || MFI.hasPatchPoint() ||
1150 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
1151 MF.getTarget().Options.DisableFramePointerElim(MF);
1152}