File: | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp |
Warning: | line 372, column 9 Value stored to 'HandledScratchWaveOffsetReg' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===----------------------- SIFrameLowering.cpp --------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //==-----------------------------------------------------------------------===// |
8 | |
9 | #include "SIFrameLowering.h" |
10 | #include "AMDGPUSubtarget.h" |
11 | #include "SIInstrInfo.h" |
12 | #include "SIMachineFunctionInfo.h" |
13 | #include "SIRegisterInfo.h" |
14 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
15 | |
16 | #include "llvm/CodeGen/LivePhysRegs.h" |
17 | #include "llvm/CodeGen/MachineFrameInfo.h" |
18 | #include "llvm/CodeGen/MachineFunction.h" |
19 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
20 | #include "llvm/CodeGen/RegisterScavenging.h" |
21 | |
22 | using namespace llvm; |
23 | |
24 | #define DEBUG_TYPE"frame-info" "frame-info" |
25 | |
26 | |
27 | static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST, |
28 | const MachineFunction &MF) { |
29 | return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(), |
30 | ST.getMaxNumSGPRs(MF) / 4); |
31 | } |
32 | |
33 | static ArrayRef<MCPhysReg> getAllSGPRs(const GCNSubtarget &ST, |
34 | const MachineFunction &MF) { |
35 | return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), |
36 | ST.getMaxNumSGPRs(MF)); |
37 | } |
38 | |
39 | // Find a scratch register that we can use at the start of the prologue to |
40 | // re-align the stack pointer. We avoid using callee-save registers since they |
41 | // may appear to be free when this is called from canUseAsPrologue (during |
42 | // shrink wrapping), but then no longer be free when this is called from |
43 | // emitPrologue. |
44 | // |
45 | // FIXME: This is a bit conservative, since in the above case we could use one |
46 | // of the callee-save registers as a scratch temp to re-align the stack pointer, |
47 | // but we would then have to make sure that we were in fact saving at least one |
48 | // callee-save register in the prologue, which is additional complexity that |
49 | // doesn't seem worth the benefit. |
50 | static unsigned findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, |
51 | LivePhysRegs &LiveRegs, |
52 | const TargetRegisterClass &RC, |
53 | bool Unused = false) { |
54 | // Mark callee saved registers as used so we will not choose them. |
55 | const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); |
56 | for (unsigned i = 0; CSRegs[i]; ++i) |
57 | LiveRegs.addReg(CSRegs[i]); |
58 | |
59 | if (Unused) { |
60 | // We are looking for a register that can be used throughout the entire |
61 | // function, so any use is unacceptable. |
62 | for (unsigned Reg : RC) { |
63 | if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) |
64 | return Reg; |
65 | } |
66 | } else { |
67 | for (unsigned Reg : RC) { |
68 | if (LiveRegs.available(MRI, Reg)) |
69 | return Reg; |
70 | } |
71 | } |
72 | |
73 | // If we require an unused register, this is used in contexts where failure is |
74 | // an option and has an alternative plan. In other contexts, this must |
75 | // succeed0. |
76 | if (!Unused) |
77 | report_fatal_error("failed to find free scratch register"); |
78 | |
79 | return AMDGPU::NoRegister; |
80 | } |
81 | |
82 | static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI) { |
83 | LivePhysRegs LiveRegs; |
84 | LiveRegs.init(*MRI.getTargetRegisterInfo()); |
85 | return findScratchNonCalleeSaveRegister( |
86 | MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true); |
87 | } |
88 | |
89 | // We need to specially emit stack operations here because a different frame |
90 | // register is used than in the rest of the function, as getFrameRegister would |
91 | // use. |
92 | static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, |
93 | MachineBasicBlock::iterator I, |
94 | const SIInstrInfo *TII, unsigned SpillReg, |
95 | unsigned ScratchRsrcReg, unsigned SPReg, int FI) { |
96 | MachineFunction *MF = MBB.getParent(); |
97 | MachineFrameInfo &MFI = MF->getFrameInfo(); |
98 | |
99 | int64_t Offset = MFI.getObjectOffset(FI); |
100 | |
101 | MachineMemOperand *MMO = MF->getMachineMemOperand( |
102 | MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4, |
103 | MFI.getObjectAlignment(FI)); |
104 | |
105 | if (isUInt<12>(Offset)) { |
106 | BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET)) |
107 | .addReg(SpillReg, RegState::Kill) |
108 | .addReg(ScratchRsrcReg) |
109 | .addReg(SPReg) |
110 | .addImm(Offset) |
111 | .addImm(0) // glc |
112 | .addImm(0) // slc |
113 | .addImm(0) // tfe |
114 | .addImm(0) // dlc |
115 | .addImm(0) // swz |
116 | .addMemOperand(MMO); |
117 | return; |
118 | } |
119 | |
120 | MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( |
121 | MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); |
122 | |
123 | BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) |
124 | .addImm(Offset); |
125 | |
126 | BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN)) |
127 | .addReg(SpillReg, RegState::Kill) |
128 | .addReg(OffsetReg, RegState::Kill) |
129 | .addReg(ScratchRsrcReg) |
130 | .addReg(SPReg) |
131 | .addImm(0) |
132 | .addImm(0) // glc |
133 | .addImm(0) // slc |
134 | .addImm(0) // tfe |
135 | .addImm(0) // dlc |
136 | .addImm(0) // swz |
137 | .addMemOperand(MMO); |
138 | } |
139 | |
140 | static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, |
141 | MachineBasicBlock::iterator I, |
142 | const SIInstrInfo *TII, unsigned SpillReg, |
143 | unsigned ScratchRsrcReg, unsigned SPReg, int FI) { |
144 | MachineFunction *MF = MBB.getParent(); |
145 | MachineFrameInfo &MFI = MF->getFrameInfo(); |
146 | int64_t Offset = MFI.getObjectOffset(FI); |
147 | |
148 | MachineMemOperand *MMO = MF->getMachineMemOperand( |
149 | MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4, |
150 | MFI.getObjectAlignment(FI)); |
151 | |
152 | if (isUInt<12>(Offset)) { |
153 | BuildMI(MBB, I, DebugLoc(), |
154 | TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg) |
155 | .addReg(ScratchRsrcReg) |
156 | .addReg(SPReg) |
157 | .addImm(Offset) |
158 | .addImm(0) // glc |
159 | .addImm(0) // slc |
160 | .addImm(0) // tfe |
161 | .addImm(0) // dlc |
162 | .addImm(0) // swz |
163 | .addMemOperand(MMO); |
164 | return; |
165 | } |
166 | |
167 | MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( |
168 | MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); |
169 | |
170 | BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) |
171 | .addImm(Offset); |
172 | |
173 | BuildMI(MBB, I, DebugLoc(), |
174 | TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg) |
175 | .addReg(OffsetReg, RegState::Kill) |
176 | .addReg(ScratchRsrcReg) |
177 | .addReg(SPReg) |
178 | .addImm(0) |
179 | .addImm(0) // glc |
180 | .addImm(0) // slc |
181 | .addImm(0) // tfe |
182 | .addImm(0) // dlc |
183 | .addImm(0) // swz |
184 | .addMemOperand(MMO); |
185 | } |
186 | |
187 | void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST, |
188 | MachineFunction &MF, |
189 | MachineBasicBlock &MBB) const { |
190 | const SIInstrInfo *TII = ST.getInstrInfo(); |
191 | const SIRegisterInfo* TRI = &TII->getRegisterInfo(); |
192 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
193 | |
194 | // We don't need this if we only have spills since there is no user facing |
195 | // scratch. |
196 | |
197 | // TODO: If we know we don't have flat instructions earlier, we can omit |
198 | // this from the input registers. |
199 | // |
200 | // TODO: We only need to know if we access scratch space through a flat |
201 | // pointer. Because we only detect if flat instructions are used at all, |
202 | // this will be used more often than necessary on VI. |
203 | |
204 | // Debug location must be unknown since the first debug location is used to |
205 | // determine the end of the prologue. |
206 | DebugLoc DL; |
207 | MachineBasicBlock::iterator I = MBB.begin(); |
208 | |
209 | Register FlatScratchInitReg = |
210 | MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); |
211 | |
212 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
213 | MRI.addLiveIn(FlatScratchInitReg); |
214 | MBB.addLiveIn(FlatScratchInitReg); |
215 | |
216 | Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); |
217 | Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); |
218 | |
219 | unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); |
220 | |
221 | // Do a 64-bit pointer add. |
222 | if (ST.flatScratchIsPointer()) { |
223 | if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { |
224 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) |
225 | .addReg(FlatScrInitLo) |
226 | .addReg(ScratchWaveOffsetReg); |
227 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi) |
228 | .addReg(FlatScrInitHi) |
229 | .addImm(0); |
230 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). |
231 | addReg(FlatScrInitLo). |
232 | addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO | |
233 | (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); |
234 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). |
235 | addReg(FlatScrInitHi). |
236 | addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI | |
237 | (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); |
238 | return; |
239 | } |
240 | |
241 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) |
242 | .addReg(FlatScrInitLo) |
243 | .addReg(ScratchWaveOffsetReg); |
244 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI) |
245 | .addReg(FlatScrInitHi) |
246 | .addImm(0); |
247 | |
248 | return; |
249 | } |
250 | |
251 | assert(ST.getGeneration() < AMDGPUSubtarget::GFX10)((ST.getGeneration() < AMDGPUSubtarget::GFX10) ? static_cast <void> (0) : __assert_fail ("ST.getGeneration() < AMDGPUSubtarget::GFX10" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 251, __PRETTY_FUNCTION__)); |
252 | |
253 | // Copy the size in bytes. |
254 | BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) |
255 | .addReg(FlatScrInitHi, RegState::Kill); |
256 | |
257 | // Add wave offset in bytes to private base offset. |
258 | // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init. |
259 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) |
260 | .addReg(FlatScrInitLo) |
261 | .addReg(ScratchWaveOffsetReg); |
262 | |
263 | // Convert offset to 256-byte units. |
264 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI) |
265 | .addReg(FlatScrInitLo, RegState::Kill) |
266 | .addImm(8); |
267 | } |
268 | |
269 | unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( |
270 | const GCNSubtarget &ST, |
271 | const SIInstrInfo *TII, |
272 | const SIRegisterInfo *TRI, |
273 | SIMachineFunctionInfo *MFI, |
274 | MachineFunction &MF) const { |
275 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
276 | |
277 | // We need to insert initialization of the scratch resource descriptor. |
278 | unsigned ScratchRsrcReg = MFI->getScratchRSrcReg(); |
279 | if (ScratchRsrcReg == AMDGPU::NoRegister || |
280 | !MRI.isPhysRegUsed(ScratchRsrcReg)) |
281 | return AMDGPU::NoRegister; |
282 | |
283 | if (ST.hasSGPRInitBug() || |
284 | ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF)) |
285 | return ScratchRsrcReg; |
286 | |
287 | // We reserved the last registers for this. Shift it down to the end of those |
288 | // which were actually used. |
289 | // |
290 | // FIXME: It might be safer to use a pseudoregister before replacement. |
291 | |
292 | // FIXME: We should be able to eliminate unused input registers. We only |
293 | // cannot do this for the resources required for scratch access. For now we |
294 | // skip over user SGPRs and may leave unused holes. |
295 | |
296 | // We find the resource first because it has an alignment requirement. |
297 | |
298 | unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; |
299 | ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF); |
300 | AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded)); |
301 | |
302 | // Skip the last N reserved elements because they should have already been |
303 | // reserved for VCC etc. |
304 | for (MCPhysReg Reg : AllSGPR128s) { |
305 | // Pick the first unallocated one. Make sure we don't clobber the other |
306 | // reserved input we needed. |
307 | if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) { |
308 | MRI.replaceRegWith(ScratchRsrcReg, Reg); |
309 | MFI->setScratchRSrcReg(Reg); |
310 | return Reg; |
311 | } |
312 | } |
313 | |
314 | return ScratchRsrcReg; |
315 | } |
316 | |
317 | // Shift down registers reserved for the scratch wave offset. |
318 | std::pair<unsigned, bool> |
319 | SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( |
320 | const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, |
321 | SIMachineFunctionInfo *MFI, MachineFunction &MF) const { |
322 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
323 | unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); |
324 | |
325 | assert(MFI->isEntryFunction())((MFI->isEntryFunction()) ? static_cast<void> (0) : __assert_fail ("MFI->isEntryFunction()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 325, __PRETTY_FUNCTION__)); |
326 | |
327 | // No replacement necessary. |
328 | if (ScratchWaveOffsetReg == AMDGPU::NoRegister || |
329 | (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) { |
330 | return std::make_pair(AMDGPU::NoRegister, false); |
331 | } |
332 | |
333 | if (ST.hasSGPRInitBug()) |
334 | return std::make_pair(ScratchWaveOffsetReg, false); |
335 | |
336 | unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); |
337 | |
338 | ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF); |
339 | if (NumPreloaded > AllSGPRs.size()) |
340 | return std::make_pair(ScratchWaveOffsetReg, false); |
341 | |
342 | AllSGPRs = AllSGPRs.slice(NumPreloaded); |
343 | |
344 | // We need to drop register from the end of the list that we cannot use |
345 | // for the scratch wave offset. |
346 | // + 2 s102 and s103 do not exist on VI. |
347 | // + 2 for vcc |
348 | // + 2 for xnack_mask |
349 | // + 2 for flat_scratch |
350 | // + 4 for registers reserved for scratch resource register |
351 | // + 1 for register reserved for scratch wave offset. (By exluding this |
352 | // register from the list to consider, it means that when this |
353 | // register is being used for the scratch wave offset and there |
354 | // are no other free SGPRs, then the value will stay in this register. |
355 | // + 1 if stack pointer is used. |
356 | // ---- |
357 | // 13 (+1) |
358 | unsigned ReservedRegCount = 13; |
359 | |
360 | if (AllSGPRs.size() < ReservedRegCount) |
361 | return std::make_pair(ScratchWaveOffsetReg, false); |
362 | |
363 | bool HandledScratchWaveOffsetReg = |
364 | ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF); |
365 | bool FPAdjusted = false; |
366 | |
367 | for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) { |
368 | // Pick the first unallocated SGPR. Be careful not to pick an alias of the |
369 | // scratch descriptor, since we haven’t added its uses yet. |
370 | if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) { |
371 | if (!HandledScratchWaveOffsetReg) { |
372 | HandledScratchWaveOffsetReg = true; |
Value stored to 'HandledScratchWaveOffsetReg' is never read | |
373 | |
374 | MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); |
375 | if (MFI->getScratchWaveOffsetReg() == MFI->getStackPtrOffsetReg()) { |
376 | assert(!hasFP(MF))((!hasFP(MF)) ? static_cast<void> (0) : __assert_fail ( "!hasFP(MF)", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 376, __PRETTY_FUNCTION__)); |
377 | MFI->setStackPtrOffsetReg(Reg); |
378 | } |
379 | |
380 | MFI->setScratchWaveOffsetReg(Reg); |
381 | MFI->setFrameOffsetReg(Reg); |
382 | ScratchWaveOffsetReg = Reg; |
383 | FPAdjusted = true; |
384 | break; |
385 | } |
386 | } |
387 | } |
388 | |
389 | return std::make_pair(ScratchWaveOffsetReg, FPAdjusted); |
390 | } |
391 | |
392 | void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, |
393 | MachineBasicBlock &MBB) const { |
394 | assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported")((&MF.front() == &MBB && "Shrink-wrapping not yet supported" ) ? static_cast<void> (0) : __assert_fail ("&MF.front() == &MBB && \"Shrink-wrapping not yet supported\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 394, __PRETTY_FUNCTION__)); |
395 | |
396 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
397 | |
398 | // If we only have SGPR spills, we won't actually be using scratch memory |
399 | // since these spill to VGPRs. |
400 | // |
401 | // FIXME: We should be cleaning up these unused SGPR spill frame indices |
402 | // somewhere. |
403 | |
404 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
405 | const SIInstrInfo *TII = ST.getInstrInfo(); |
406 | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); |
407 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
408 | const Function &F = MF.getFunction(); |
409 | |
410 | // We need to do the replacement of the private segment buffer and wave offset |
411 | // register even if there are no stack objects. There could be stores to undef |
412 | // or a constant without an associated object. |
413 | |
414 | // FIXME: We still have implicit uses on SGPR spill instructions in case they |
415 | // need to spill to vector memory. It's likely that will not happen, but at |
416 | // this point it appears we need the setup. This part of the prolog should be |
417 | // emitted after frame indices are eliminated. |
418 | |
419 | if (MFI->hasFlatScratchInit()) |
420 | emitFlatScratchInit(ST, MF, MBB); |
421 | |
422 | unsigned ScratchRsrcReg |
423 | = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); |
424 | |
425 | unsigned ScratchWaveOffsetReg; |
426 | bool FPAdjusted; |
427 | std::tie(ScratchWaveOffsetReg, FPAdjusted) = |
428 | getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); |
429 | |
430 | // We need to insert initialization of the scratch resource descriptor. |
431 | Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( |
432 | AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); |
433 | |
434 | unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; |
435 | if (ST.isAmdHsaOrMesa(F)) { |
436 | PreloadedPrivateBufferReg = MFI->getPreloadedReg( |
437 | AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); |
438 | } |
439 | |
440 | bool OffsetRegUsed = ScratchWaveOffsetReg != AMDGPU::NoRegister && |
441 | MRI.isPhysRegUsed(ScratchWaveOffsetReg); |
442 | bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister && |
443 | MRI.isPhysRegUsed(ScratchRsrcReg); |
444 | |
445 | // FIXME: Hack to not crash in situations which emitted an error. |
446 | if (PreloadedScratchWaveOffsetReg == AMDGPU::NoRegister) |
447 | return; |
448 | |
449 | // We added live-ins during argument lowering, but since they were not used |
450 | // they were deleted. We're adding the uses now, so add them back. |
451 | MRI.addLiveIn(PreloadedScratchWaveOffsetReg); |
452 | MBB.addLiveIn(PreloadedScratchWaveOffsetReg); |
453 | |
454 | if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) { |
455 | assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F))((ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F)) ? static_cast <void> (0) : __assert_fail ("ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F)" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 455, __PRETTY_FUNCTION__)); |
456 | MRI.addLiveIn(PreloadedPrivateBufferReg); |
457 | MBB.addLiveIn(PreloadedPrivateBufferReg); |
458 | } |
459 | |
460 | // Make the register selected live throughout the function. |
461 | for (MachineBasicBlock &OtherBB : MF) { |
462 | if (&OtherBB == &MBB) |
463 | continue; |
464 | |
465 | if (OffsetRegUsed || FPAdjusted) |
466 | OtherBB.addLiveIn(ScratchWaveOffsetReg); |
467 | |
468 | if (ResourceRegUsed) |
469 | OtherBB.addLiveIn(ScratchRsrcReg); |
470 | } |
471 | |
472 | DebugLoc DL; |
473 | MachineBasicBlock::iterator I = MBB.begin(); |
474 | |
475 | // If we reserved the original input registers, we don't need to copy to the |
476 | // reserved registers. |
477 | |
478 | bool CopyBuffer = ResourceRegUsed && |
479 | PreloadedPrivateBufferReg != AMDGPU::NoRegister && |
480 | ST.isAmdHsaOrMesa(F) && |
481 | ScratchRsrcReg != PreloadedPrivateBufferReg; |
482 | |
483 | // This needs to be careful of the copying order to avoid overwriting one of |
484 | // the input registers before it's been copied to it's final |
485 | // destination. Usually the offset should be copied first. |
486 | bool CopyBufferFirst = TRI->isSubRegisterEq(PreloadedPrivateBufferReg, |
487 | ScratchWaveOffsetReg); |
488 | if (CopyBuffer && CopyBufferFirst) { |
489 | BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg) |
490 | .addReg(PreloadedPrivateBufferReg, RegState::Kill); |
491 | } |
492 | |
493 | unsigned SPReg = MFI->getStackPtrOffsetReg(); |
494 | assert(SPReg != AMDGPU::SP_REG)((SPReg != AMDGPU::SP_REG) ? static_cast<void> (0) : __assert_fail ("SPReg != AMDGPU::SP_REG", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 494, __PRETTY_FUNCTION__)); |
495 | |
496 | // FIXME: Remove the isPhysRegUsed checks |
497 | const bool HasFP = hasFP(MF); |
498 | |
499 | if (HasFP || OffsetRegUsed) { |
500 | assert(ScratchWaveOffsetReg)((ScratchWaveOffsetReg) ? static_cast<void> (0) : __assert_fail ("ScratchWaveOffsetReg", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 500, __PRETTY_FUNCTION__)); |
501 | BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) |
502 | .addReg(PreloadedScratchWaveOffsetReg, HasFP ? RegState::Kill : 0); |
503 | } |
504 | |
505 | if (CopyBuffer && !CopyBufferFirst) { |
506 | BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg) |
507 | .addReg(PreloadedPrivateBufferReg, RegState::Kill); |
508 | } |
509 | |
510 | if (ResourceRegUsed) { |
511 | emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I, |
512 | PreloadedPrivateBufferReg, ScratchRsrcReg); |
513 | } |
514 | |
515 | if (HasFP) { |
516 | DebugLoc DL; |
517 | const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
518 | int64_t StackSize = FrameInfo.getStackSize(); |
519 | |
520 | // On kernel entry, the private scratch wave offset is the SP value. |
521 | if (StackSize == 0) { |
522 | BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SPReg) |
523 | .addReg(MFI->getScratchWaveOffsetReg()); |
524 | } else { |
525 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), SPReg) |
526 | .addReg(MFI->getScratchWaveOffsetReg()) |
527 | .addImm(StackSize * ST.getWavefrontSize()); |
528 | } |
529 | } |
530 | } |
531 | |
532 | // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set. |
533 | void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, |
534 | MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI, |
535 | MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg, |
536 | unsigned ScratchRsrcReg) const { |
537 | |
538 | const SIInstrInfo *TII = ST.getInstrInfo(); |
539 | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); |
540 | const Function &Fn = MF.getFunction(); |
541 | DebugLoc DL; |
542 | |
543 | if (ST.isAmdPalOS()) { |
544 | // The pointer to the GIT is formed from the offset passed in and either |
545 | // the amdgpu-git-ptr-high function attribute or the top part of the PC |
546 | Register RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); |
547 | Register RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); |
548 | Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); |
549 | |
550 | const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); |
551 | |
552 | if (MFI->getGITPtrHigh() != 0xffffffff) { |
553 | BuildMI(MBB, I, DL, SMovB32, RsrcHi) |
554 | .addImm(MFI->getGITPtrHigh()) |
555 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
556 | } else { |
557 | const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64); |
558 | BuildMI(MBB, I, DL, GetPC64, Rsrc01); |
559 | } |
560 | auto GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in |
561 | if (ST.hasMergedShaders()) { |
562 | switch (MF.getFunction().getCallingConv()) { |
563 | case CallingConv::AMDGPU_HS: |
564 | case CallingConv::AMDGPU_GS: |
565 | // Low GIT address is passed in s8 rather than s0 for an LS+HS or |
566 | // ES+GS merged shader on gfx9+. |
567 | GitPtrLo = AMDGPU::SGPR8; |
568 | break; |
569 | default: |
570 | break; |
571 | } |
572 | } |
573 | MF.getRegInfo().addLiveIn(GitPtrLo); |
574 | MBB.addLiveIn(GitPtrLo); |
575 | BuildMI(MBB, I, DL, SMovB32, RsrcLo) |
576 | .addReg(GitPtrLo) |
577 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
578 | |
579 | // We now have the GIT ptr - now get the scratch descriptor from the entry |
580 | // at offset 0 (or offset 16 for a compute shader). |
581 | MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); |
582 | const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); |
583 | auto MMO = MF.getMachineMemOperand(PtrInfo, |
584 | MachineMemOperand::MOLoad | |
585 | MachineMemOperand::MOInvariant | |
586 | MachineMemOperand::MODereferenceable, |
587 | 16, 4); |
588 | unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; |
589 | const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); |
590 | unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); |
591 | BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) |
592 | .addReg(Rsrc01) |
593 | .addImm(EncodedOffset) // offset |
594 | .addImm(0) // glc |
595 | .addImm(0) // dlc |
596 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine) |
597 | .addMemOperand(MMO); |
598 | return; |
599 | } |
600 | if (ST.isMesaGfxShader(Fn) |
601 | || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) { |
602 | assert(!ST.isAmdHsaOrMesa(Fn))((!ST.isAmdHsaOrMesa(Fn)) ? static_cast<void> (0) : __assert_fail ("!ST.isAmdHsaOrMesa(Fn)", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 602, __PRETTY_FUNCTION__)); |
603 | const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); |
604 | |
605 | Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); |
606 | Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); |
607 | |
608 | // Use relocations to get the pointer, and setup the other bits manually. |
609 | uint64_t Rsrc23 = TII->getScratchRsrcWords23(); |
610 | |
611 | if (MFI->hasImplicitBufferPtr()) { |
612 | Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); |
613 | |
614 | if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { |
615 | const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); |
616 | |
617 | BuildMI(MBB, I, DL, Mov64, Rsrc01) |
618 | .addReg(MFI->getImplicitBufferPtrUserSGPR()) |
619 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
620 | } else { |
621 | const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); |
622 | |
623 | MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); |
624 | auto MMO = MF.getMachineMemOperand(PtrInfo, |
625 | MachineMemOperand::MOLoad | |
626 | MachineMemOperand::MOInvariant | |
627 | MachineMemOperand::MODereferenceable, |
628 | 8, 4); |
629 | BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) |
630 | .addReg(MFI->getImplicitBufferPtrUserSGPR()) |
631 | .addImm(0) // offset |
632 | .addImm(0) // glc |
633 | .addImm(0) // dlc |
634 | .addMemOperand(MMO) |
635 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
636 | |
637 | MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); |
638 | MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); |
639 | } |
640 | } else { |
641 | Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); |
642 | Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); |
643 | |
644 | BuildMI(MBB, I, DL, SMovB32, Rsrc0) |
645 | .addExternalSymbol("SCRATCH_RSRC_DWORD0") |
646 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
647 | |
648 | BuildMI(MBB, I, DL, SMovB32, Rsrc1) |
649 | .addExternalSymbol("SCRATCH_RSRC_DWORD1") |
650 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
651 | |
652 | } |
653 | |
654 | BuildMI(MBB, I, DL, SMovB32, Rsrc2) |
655 | .addImm(Rsrc23 & 0xffffffff) |
656 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
657 | |
658 | BuildMI(MBB, I, DL, SMovB32, Rsrc3) |
659 | .addImm(Rsrc23 >> 32) |
660 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
661 | } |
662 | } |
663 | |
664 | bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { |
665 | switch (ID) { |
666 | case TargetStackID::Default: |
667 | case TargetStackID::NoAlloc: |
668 | case TargetStackID::SGPRSpill: |
669 | return true; |
670 | case TargetStackID::SVEVector: |
671 | return false; |
672 | } |
673 | llvm_unreachable("Invalid TargetStackID::Value")::llvm::llvm_unreachable_internal("Invalid TargetStackID::Value" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 673); |
674 | } |
675 | |
676 | void SIFrameLowering::emitPrologue(MachineFunction &MF, |
677 | MachineBasicBlock &MBB) const { |
678 | SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
679 | if (FuncInfo->isEntryFunction()) { |
680 | emitEntryFunctionPrologue(MF, MBB); |
681 | return; |
682 | } |
683 | |
684 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
685 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
686 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
687 | const SIInstrInfo *TII = ST.getInstrInfo(); |
688 | const SIRegisterInfo &TRI = TII->getRegisterInfo(); |
689 | |
690 | unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); |
691 | unsigned FramePtrReg = FuncInfo->getFrameOffsetReg(); |
692 | LivePhysRegs LiveRegs; |
693 | |
694 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
695 | DebugLoc DL; |
696 | |
697 | bool HasFP = false; |
698 | uint32_t NumBytes = MFI.getStackSize(); |
699 | uint32_t RoundedSize = NumBytes; |
700 | // To avoid clobbering VGPRs in lanes that weren't active on function entry, |
701 | // turn on all lanes before doing the spill to memory. |
702 | unsigned ScratchExecCopy = AMDGPU::NoRegister; |
703 | |
704 | // Emit the copy if we need an FP, and are using a free SGPR to save it. |
705 | if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) { |
706 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy) |
707 | .addReg(FramePtrReg) |
708 | .setMIFlag(MachineInstr::FrameSetup); |
709 | } |
710 | |
711 | for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg |
712 | : FuncInfo->getSGPRSpillVGPRs()) { |
713 | if (!Reg.FI.hasValue()) |
714 | continue; |
715 | |
716 | if (ScratchExecCopy == AMDGPU::NoRegister) { |
717 | if (LiveRegs.empty()) { |
718 | LiveRegs.init(TRI); |
719 | LiveRegs.addLiveIns(MBB); |
720 | if (FuncInfo->SGPRForFPSaveRestoreCopy) |
721 | LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy); |
722 | } |
723 | |
724 | ScratchExecCopy |
725 | = findScratchNonCalleeSaveRegister(MRI, LiveRegs, |
726 | *TRI.getWaveMaskRegClass()); |
727 | assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy)((FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy) ? static_cast<void> (0) : __assert_fail ("FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 727, __PRETTY_FUNCTION__)); |
728 | |
729 | const unsigned OrSaveExec = ST.isWave32() ? |
730 | AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; |
731 | BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), |
732 | ScratchExecCopy) |
733 | .addImm(-1); |
734 | } |
735 | |
736 | buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR, |
737 | FuncInfo->getScratchRSrcReg(), |
738 | StackPtrReg, |
739 | Reg.FI.getValue()); |
740 | } |
741 | |
742 | if (ScratchExecCopy != AMDGPU::NoRegister) { |
743 | // FIXME: Split block and make terminator. |
744 | unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; |
745 | unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
746 | BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) |
747 | .addReg(ScratchExecCopy, RegState::Kill); |
748 | LiveRegs.addReg(ScratchExecCopy); |
749 | } |
750 | |
751 | |
752 | if (FuncInfo->FramePointerSaveIndex) { |
753 | const int FI = FuncInfo->FramePointerSaveIndex.getValue(); |
754 | assert(!MFI.isDeadObjectIndex(FI) &&((!MFI.isDeadObjectIndex(FI) && MFI.getStackID(FI) == TargetStackID::SGPRSpill) ? static_cast<void> (0) : __assert_fail ("!MFI.isDeadObjectIndex(FI) && MFI.getStackID(FI) == TargetStackID::SGPRSpill" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 755, __PRETTY_FUNCTION__)) |
755 | MFI.getStackID(FI) == TargetStackID::SGPRSpill)((!MFI.isDeadObjectIndex(FI) && MFI.getStackID(FI) == TargetStackID::SGPRSpill) ? static_cast<void> (0) : __assert_fail ("!MFI.isDeadObjectIndex(FI) && MFI.getStackID(FI) == TargetStackID::SGPRSpill" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 755, __PRETTY_FUNCTION__)); |
756 | ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill |
757 | = FuncInfo->getSGPRToVGPRSpills(FI); |
758 | assert(Spill.size() == 1)((Spill.size() == 1) ? static_cast<void> (0) : __assert_fail ("Spill.size() == 1", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 758, __PRETTY_FUNCTION__)); |
759 | |
760 | // Save FP before setting it up. |
761 | // FIXME: This should respect spillSGPRToVGPR; |
762 | BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), |
763 | Spill[0].VGPR) |
764 | .addReg(FramePtrReg) |
765 | .addImm(Spill[0].Lane) |
766 | .addReg(Spill[0].VGPR, RegState::Undef); |
767 | } |
768 | |
769 | if (TRI.needsStackRealignment(MF)) { |
770 | HasFP = true; |
771 | const unsigned Alignment = MFI.getMaxAlignment(); |
772 | |
773 | RoundedSize += Alignment; |
774 | if (LiveRegs.empty()) { |
775 | LiveRegs.init(TRI); |
776 | LiveRegs.addLiveIns(MBB); |
777 | LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy); |
778 | } |
779 | |
780 | unsigned ScratchSPReg = findScratchNonCalleeSaveRegister( |
781 | MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass); |
782 | assert(ScratchSPReg != AMDGPU::NoRegister &&((ScratchSPReg != AMDGPU::NoRegister && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy) ? static_cast<void > (0) : __assert_fail ("ScratchSPReg != AMDGPU::NoRegister && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 783, __PRETTY_FUNCTION__)) |
783 | ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy)((ScratchSPReg != AMDGPU::NoRegister && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy) ? static_cast<void > (0) : __assert_fail ("ScratchSPReg != AMDGPU::NoRegister && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 783, __PRETTY_FUNCTION__)); |
784 | |
785 | // s_add_u32 tmp_reg, s32, NumBytes |
786 | // s_and_b32 s32, tmp_reg, 0b111...0000 |
787 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg) |
788 | .addReg(StackPtrReg) |
789 | .addImm((Alignment - 1) * ST.getWavefrontSize()) |
790 | .setMIFlag(MachineInstr::FrameSetup); |
791 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg) |
792 | .addReg(ScratchSPReg, RegState::Kill) |
793 | .addImm(-Alignment * ST.getWavefrontSize()) |
794 | .setMIFlag(MachineInstr::FrameSetup); |
795 | FuncInfo->setIsStackRealigned(true); |
796 | } else if ((HasFP = hasFP(MF))) { |
797 | // If we need a base pointer, set it up here. It's whatever the value of |
798 | // the stack pointer is at this point. Any variable size objects will be |
799 | // allocated after this, so we can still use the base pointer to reference |
800 | // locals. |
801 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) |
802 | .addReg(StackPtrReg) |
803 | .setMIFlag(MachineInstr::FrameSetup); |
804 | } |
805 | |
806 | if (HasFP && RoundedSize != 0) { |
807 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) |
808 | .addReg(StackPtrReg) |
809 | .addImm(RoundedSize * ST.getWavefrontSize()) |
810 | .setMIFlag(MachineInstr::FrameSetup); |
811 | } |
812 | |
813 | assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister ||(((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU ::NoRegister || FuncInfo->FramePointerSaveIndex)) && "Needed to save FP but didn't save it anywhere") ? static_cast <void> (0) : __assert_fail ("(!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister || FuncInfo->FramePointerSaveIndex)) && \"Needed to save FP but didn't save it anywhere\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 815, __PRETTY_FUNCTION__)) |
814 | FuncInfo->FramePointerSaveIndex)) &&(((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU ::NoRegister || FuncInfo->FramePointerSaveIndex)) && "Needed to save FP but didn't save it anywhere") ? static_cast <void> (0) : __assert_fail ("(!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister || FuncInfo->FramePointerSaveIndex)) && \"Needed to save FP but didn't save it anywhere\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 815, __PRETTY_FUNCTION__)) |
815 | "Needed to save FP but didn't save it anywhere")(((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU ::NoRegister || FuncInfo->FramePointerSaveIndex)) && "Needed to save FP but didn't save it anywhere") ? static_cast <void> (0) : __assert_fail ("(!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister || FuncInfo->FramePointerSaveIndex)) && \"Needed to save FP but didn't save it anywhere\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 815, __PRETTY_FUNCTION__)); |
816 | |
817 | assert((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister &&(((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU:: NoRegister && !FuncInfo->FramePointerSaveIndex)) && "Saved FP but didn't need it") ? static_cast<void> (0) : __assert_fail ("(HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister && !FuncInfo->FramePointerSaveIndex)) && \"Saved FP but didn't need it\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 819, __PRETTY_FUNCTION__)) |
818 | !FuncInfo->FramePointerSaveIndex)) &&(((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU:: NoRegister && !FuncInfo->FramePointerSaveIndex)) && "Saved FP but didn't need it") ? static_cast<void> (0) : __assert_fail ("(HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister && !FuncInfo->FramePointerSaveIndex)) && \"Saved FP but didn't need it\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 819, __PRETTY_FUNCTION__)) |
819 | "Saved FP but didn't need it")(((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU:: NoRegister && !FuncInfo->FramePointerSaveIndex)) && "Saved FP but didn't need it") ? static_cast<void> (0) : __assert_fail ("(HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister && !FuncInfo->FramePointerSaveIndex)) && \"Saved FP but didn't need it\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 819, __PRETTY_FUNCTION__)); |
820 | } |
821 | |
822 | void SIFrameLowering::emitEpilogue(MachineFunction &MF, |
823 | MachineBasicBlock &MBB) const { |
824 | const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
825 | if (FuncInfo->isEntryFunction()) |
826 | return; |
827 | |
828 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
829 | const SIInstrInfo *TII = ST.getInstrInfo(); |
830 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
831 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
832 | LivePhysRegs LiveRegs; |
833 | DebugLoc DL; |
834 | |
835 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
836 | uint32_t NumBytes = MFI.getStackSize(); |
837 | uint32_t RoundedSize = FuncInfo->isStackRealigned() ? |
838 | NumBytes + MFI.getMaxAlignment() : NumBytes; |
839 | |
840 | if (RoundedSize != 0 && hasFP(MF)) { |
841 | const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); |
842 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) |
843 | .addReg(StackPtrReg) |
844 | .addImm(RoundedSize * ST.getWavefrontSize()) |
845 | .setMIFlag(MachineInstr::FrameDestroy); |
846 | } |
847 | |
848 | if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) { |
849 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg()) |
850 | .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) |
851 | .setMIFlag(MachineInstr::FrameSetup); |
852 | } |
853 | |
854 | if (FuncInfo->FramePointerSaveIndex) { |
855 | const int FI = FuncInfo->FramePointerSaveIndex.getValue(); |
856 | |
857 | assert(!MF.getFrameInfo().isDeadObjectIndex(FI) &&((!MF.getFrameInfo().isDeadObjectIndex(FI) && MF.getFrameInfo ().getStackID(FI) == TargetStackID::SGPRSpill) ? static_cast< void> (0) : __assert_fail ("!MF.getFrameInfo().isDeadObjectIndex(FI) && MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 858, __PRETTY_FUNCTION__)) |
858 | MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill)((!MF.getFrameInfo().isDeadObjectIndex(FI) && MF.getFrameInfo ().getStackID(FI) == TargetStackID::SGPRSpill) ? static_cast< void> (0) : __assert_fail ("!MF.getFrameInfo().isDeadObjectIndex(FI) && MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 858, __PRETTY_FUNCTION__)); |
859 | |
860 | ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill |
861 | = FuncInfo->getSGPRToVGPRSpills(FI); |
862 | assert(Spill.size() == 1)((Spill.size() == 1) ? static_cast<void> (0) : __assert_fail ("Spill.size() == 1", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 862, __PRETTY_FUNCTION__)); |
863 | BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), |
864 | FuncInfo->getFrameOffsetReg()) |
865 | .addReg(Spill[0].VGPR) |
866 | .addImm(Spill[0].Lane); |
867 | } |
868 | |
869 | unsigned ScratchExecCopy = AMDGPU::NoRegister; |
870 | for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg |
871 | : FuncInfo->getSGPRSpillVGPRs()) { |
872 | if (!Reg.FI.hasValue()) |
873 | continue; |
874 | |
875 | const SIRegisterInfo &TRI = TII->getRegisterInfo(); |
876 | if (ScratchExecCopy == AMDGPU::NoRegister) { |
877 | // See emitPrologue |
878 | if (LiveRegs.empty()) { |
879 | LiveRegs.init(*ST.getRegisterInfo()); |
880 | LiveRegs.addLiveOuts(MBB); |
881 | LiveRegs.stepBackward(*MBBI); |
882 | } |
883 | |
884 | ScratchExecCopy = findScratchNonCalleeSaveRegister( |
885 | MRI, LiveRegs, *TRI.getWaveMaskRegClass()); |
886 | LiveRegs.removeReg(ScratchExecCopy); |
887 | |
888 | const unsigned OrSaveExec = |
889 | ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; |
890 | |
891 | BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy) |
892 | .addImm(-1); |
893 | } |
894 | |
895 | buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR, |
896 | FuncInfo->getScratchRSrcReg(), |
897 | FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue()); |
898 | } |
899 | |
900 | if (ScratchExecCopy != AMDGPU::NoRegister) { |
901 | // FIXME: Split block and make terminator. |
902 | unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; |
903 | unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
904 | BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) |
905 | .addReg(ScratchExecCopy, RegState::Kill); |
906 | } |
907 | } |
908 | |
909 | // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not |
910 | // memory. They should have been removed by now. |
911 | static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { |
912 | for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); |
913 | I != E; ++I) { |
914 | if (!MFI.isDeadObjectIndex(I)) |
915 | return false; |
916 | } |
917 | |
918 | return true; |
919 | } |
920 | |
921 | #ifndef NDEBUG |
922 | static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI, |
923 | Optional<int> FramePointerSaveIndex) { |
924 | for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); |
925 | I != E; ++I) { |
926 | if (!MFI.isDeadObjectIndex(I) && |
927 | MFI.getStackID(I) == TargetStackID::SGPRSpill && |
928 | FramePointerSaveIndex && I != FramePointerSaveIndex) { |
929 | return false; |
930 | } |
931 | } |
932 | |
933 | return true; |
934 | } |
935 | #endif |
936 | |
937 | int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, |
938 | unsigned &FrameReg) const { |
939 | const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); |
940 | |
941 | FrameReg = RI->getFrameRegister(MF); |
942 | return MF.getFrameInfo().getObjectOffset(FI); |
943 | } |
944 | |
945 | void SIFrameLowering::processFunctionBeforeFrameFinalized( |
946 | MachineFunction &MF, |
947 | RegScavenger *RS) const { |
948 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
949 | |
950 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
951 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
952 | SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
953 | |
954 | FuncInfo->removeDeadFrameIndices(MFI); |
955 | assert(allSGPRSpillsAreDead(MFI, None) &&((allSGPRSpillsAreDead(MFI, None) && "SGPR spill should have been removed in SILowerSGPRSpills" ) ? static_cast<void> (0) : __assert_fail ("allSGPRSpillsAreDead(MFI, None) && \"SGPR spill should have been removed in SILowerSGPRSpills\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 956, __PRETTY_FUNCTION__)) |
956 | "SGPR spill should have been removed in SILowerSGPRSpills")((allSGPRSpillsAreDead(MFI, None) && "SGPR spill should have been removed in SILowerSGPRSpills" ) ? static_cast<void> (0) : __assert_fail ("allSGPRSpillsAreDead(MFI, None) && \"SGPR spill should have been removed in SILowerSGPRSpills\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 956, __PRETTY_FUNCTION__)); |
957 | |
958 | // FIXME: The other checks should be redundant with allStackObjectsAreDead, |
959 | // but currently hasNonSpillStackObjects is set only from source |
960 | // allocas. Stack temps produced from legalization are not counted currently. |
961 | if (!allStackObjectsAreDead(MFI)) { |
962 | assert(RS && "RegScavenger required if spilling")((RS && "RegScavenger required if spilling") ? static_cast <void> (0) : __assert_fail ("RS && \"RegScavenger required if spilling\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 962, __PRETTY_FUNCTION__)); |
963 | |
964 | if (FuncInfo->isEntryFunction()) { |
965 | int ScavengeFI = MFI.CreateFixedObject( |
966 | TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); |
967 | RS->addScavengingFrameIndex(ScavengeFI); |
968 | } else { |
969 | int ScavengeFI = MFI.CreateStackObject( |
970 | TRI->getSpillSize(AMDGPU::SGPR_32RegClass), |
971 | TRI->getSpillAlignment(AMDGPU::SGPR_32RegClass), |
972 | false); |
973 | RS->addScavengingFrameIndex(ScavengeFI); |
974 | } |
975 | } |
976 | } |
977 | |
978 | // Only report VGPRs to generic code. |
979 | void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, |
980 | BitVector &SavedVGPRs, |
981 | RegScavenger *RS) const { |
982 | TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); |
983 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
984 | if (MFI->isEntryFunction()) |
985 | return; |
986 | |
987 | const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
988 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
989 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
990 | |
991 | // Ignore the SGPRs the default implementation found. |
992 | SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask()); |
993 | |
994 | // hasFP only knows about stack objects that already exist. We're now |
995 | // determining the stack slots that will be created, so we have to predict |
996 | // them. Stack objects force FP usage with calls. |
997 | // |
998 | // Note a new VGPR CSR may be introduced if one is used for the spill, but we |
999 | // don't want to report it here. |
1000 | // |
1001 | // FIXME: Is this really hasReservedCallFrame? |
1002 | const bool WillHaveFP = |
1003 | FrameInfo.hasCalls() && |
1004 | (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); |
1005 | |
1006 | // VGPRs used for SGPR spilling need to be specially inserted in the prolog, |
1007 | // so don't allow the default insertion to handle them. |
1008 | for (auto SSpill : MFI->getSGPRSpillVGPRs()) |
1009 | SavedVGPRs.reset(SSpill.VGPR); |
1010 | |
1011 | const bool HasFP = WillHaveFP || hasFP(MF); |
1012 | if (!HasFP) |
1013 | return; |
1014 | |
1015 | if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) { |
1016 | int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr, |
1017 | TargetStackID::SGPRSpill); |
1018 | |
1019 | // If there is already a VGPR with free lanes, use it. We may already have |
1020 | // to pay the penalty for spilling a CSR VGPR. |
1021 | if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) |
1022 | llvm_unreachable("allocate SGPR spill should have worked")::llvm::llvm_unreachable_internal("allocate SGPR spill should have worked" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 1022); |
1023 | |
1024 | MFI->FramePointerSaveIndex = NewFI; |
1025 | |
1026 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI ).front(); dbgs() << "Spilling FP to " << printReg (Spill.VGPR, TRI) << ':' << Spill.Lane << '\n' ; } } while (false) |
1027 | auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI ).front(); dbgs() << "Spilling FP to " << printReg (Spill.VGPR, TRI) << ':' << Spill.Lane << '\n' ; } } while (false) |
1028 | dbgs() << "Spilling FP to " << printReg(Spill.VGPR, TRI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI ).front(); dbgs() << "Spilling FP to " << printReg (Spill.VGPR, TRI) << ':' << Spill.Lane << '\n' ; } } while (false) |
1029 | << ':' << Spill.Lane << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI ).front(); dbgs() << "Spilling FP to " << printReg (Spill.VGPR, TRI) << ':' << Spill.Lane << '\n' ; } } while (false); |
1030 | return; |
1031 | } |
1032 | |
1033 | MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo()); |
1034 | |
1035 | if (!MFI->SGPRForFPSaveRestoreCopy) { |
1036 | // There's no free lane to spill, and no free register to save FP, so we're |
1037 | // forced to spill another VGPR to use for the spill. |
1038 | int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr, |
1039 | TargetStackID::SGPRSpill); |
1040 | if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) |
1041 | llvm_unreachable("allocate SGPR spill should have worked")::llvm::llvm_unreachable_internal("allocate SGPR spill should have worked" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 1041); |
1042 | MFI->FramePointerSaveIndex = NewFI; |
1043 | |
1044 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI ).front(); dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';; } } while (false) |
1045 | auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI ).front(); dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';; } } while (false) |
1046 | dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI ).front(); dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';; } } while (false) |
1047 | << ':' << Spill.Lane << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("frame-info")) { auto Spill = MFI->getSGPRToVGPRSpills(NewFI ).front(); dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';; } } while (false); |
1048 | } else { |
1049 | LLVM_DEBUG(dbgs() << "Saving FP with copy to " <<do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("frame-info")) { dbgs() << "Saving FP with copy to " << printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n' ; } } while (false) |
1050 | printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("frame-info")) { dbgs() << "Saving FP with copy to " << printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n' ; } } while (false); |
1051 | } |
1052 | } |
1053 | |
1054 | void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, |
1055 | BitVector &SavedRegs, |
1056 | RegScavenger *RS) const { |
1057 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); |
1058 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
1059 | if (MFI->isEntryFunction()) |
1060 | return; |
1061 | |
1062 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
1063 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
1064 | |
1065 | // The SP is specifically managed and we don't want extra spills of it. |
1066 | SavedRegs.reset(MFI->getStackPtrOffsetReg()); |
1067 | SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask()); |
1068 | } |
1069 | |
1070 | bool SIFrameLowering::assignCalleeSavedSpillSlots( |
1071 | MachineFunction &MF, const TargetRegisterInfo *TRI, |
1072 | std::vector<CalleeSavedInfo> &CSI) const { |
1073 | if (CSI.empty()) |
1074 | return true; // Early exit if no callee saved registers are modified! |
1075 | |
1076 | const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
1077 | if (!FuncInfo->SGPRForFPSaveRestoreCopy) |
1078 | return false; |
1079 | |
1080 | for (auto &CS : CSI) { |
1081 | if (CS.getReg() == FuncInfo->getFrameOffsetReg()) { |
1082 | if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) |
1083 | CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy); |
1084 | break; |
1085 | } |
1086 | } |
1087 | |
1088 | return false; |
1089 | } |
1090 | |
1091 | MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( |
1092 | MachineFunction &MF, |
1093 | MachineBasicBlock &MBB, |
1094 | MachineBasicBlock::iterator I) const { |
1095 | int64_t Amount = I->getOperand(0).getImm(); |
1096 | if (Amount == 0) |
1097 | return MBB.erase(I); |
1098 | |
1099 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
1100 | const SIInstrInfo *TII = ST.getInstrInfo(); |
1101 | const DebugLoc &DL = I->getDebugLoc(); |
1102 | unsigned Opc = I->getOpcode(); |
1103 | bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); |
1104 | uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; |
1105 | |
1106 | if (!hasReservedCallFrame(MF)) { |
1107 | unsigned Align = getStackAlignment(); |
1108 | |
1109 | Amount = alignTo(Amount, Align); |
1110 | assert(isUInt<32>(Amount) && "exceeded stack address space size")((isUInt<32>(Amount) && "exceeded stack address space size" ) ? static_cast<void> (0) : __assert_fail ("isUInt<32>(Amount) && \"exceeded stack address space size\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 1110, __PRETTY_FUNCTION__)); |
1111 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
1112 | unsigned SPReg = MFI->getStackPtrOffsetReg(); |
1113 | |
1114 | unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; |
1115 | BuildMI(MBB, I, DL, TII->get(Op), SPReg) |
1116 | .addReg(SPReg) |
1117 | .addImm(Amount * ST.getWavefrontSize()); |
1118 | } else if (CalleePopAmount != 0) { |
1119 | llvm_unreachable("is this used?")::llvm::llvm_unreachable_internal("is this used?", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp" , 1119); |
1120 | } |
1121 | |
1122 | return MBB.erase(I); |
1123 | } |
1124 | |
1125 | bool SIFrameLowering::hasFP(const MachineFunction &MF) const { |
1126 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1127 | if (MFI.hasCalls()) { |
1128 | // All offsets are unsigned, so need to be addressed in the same direction |
1129 | // as stack growth. |
1130 | |
1131 | // FIXME: This function is pretty broken, since it can be called before the |
1132 | // frame layout is determined or CSR spills are inserted. |
1133 | if (MFI.getStackSize() != 0) |
1134 | return true; |
1135 | |
1136 | // For the entry point, the input wave scratch offset must be copied to the |
1137 | // API SP if there are calls. |
1138 | if (MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) |
1139 | return true; |
1140 | } |
1141 | |
1142 | return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || |
1143 | MFI.hasStackMap() || MFI.hasPatchPoint() || |
1144 | MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) || |
1145 | MF.getTarget().Options.DisableFramePointerElim(MF); |
1146 | } |