/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Bug Summary

File:	llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Warning:	line 945, column 8 Although the value stored to 'HasBP' is used in the enclosing expression, the value is never actually read from 'HasBP'

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIFrameLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AMDGPU -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

1	//===----------------------- SIFrameLowering.cpp --------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//==-----------------------------------------------------------------------===//
8
9	#include "SIFrameLowering.h"
10	#include "AMDGPU.h"
11	#include "GCNSubtarget.h"
12	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13	#include "SIMachineFunctionInfo.h"
14	#include "llvm/CodeGen/LivePhysRegs.h"
15	#include "llvm/CodeGen/MachineFrameInfo.h"
16	#include "llvm/CodeGen/RegisterScavenging.h"
17	#include "llvm/Target/TargetMachine.h"
18
19	using namespace llvm;
20
21	#define DEBUG_TYPE"frame-info" "frame-info"
22
23	static cl::opt<bool> EnableSpillVGPRToAGPR(
24	"amdgpu-spill-vgpr-to-agpr",
25	cl::desc("Enable spilling VGPRs to AGPRs"),
26	cl::ReallyHidden,
27	cl::init(true));
28
29	// Find a scratch register that we can use in the prologue. We avoid using
30	// callee-save registers since they may appear to be free when this is called
31	// from canUseAsPrologue (during shrink wrapping), but then no longer be free
32	// when this is called from emitPrologue.
33	static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
34	LivePhysRegs &LiveRegs,
35	const TargetRegisterClass &RC,
36	bool Unused = false) {
37	// Mark callee saved registers as used so we will not choose them.
38	const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
39	for (unsigned i = 0; CSRegs[i]; ++i)
40	LiveRegs.addReg(CSRegs[i]);
41
42	if (Unused) {
43	// We are looking for a register that can be used throughout the entire
44	// function, so any use is unacceptable.
45	for (MCRegister Reg : RC) {
46	if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
47	return Reg;
48	}
49	} else {
50	for (MCRegister Reg : RC) {
51	if (LiveRegs.available(MRI, Reg))
52	return Reg;
53	}
54	}
55
56	return MCRegister();
57	}
58
59	static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
60	LivePhysRegs &LiveRegs,
61	Register &TempSGPR,
62	Optional<int> &FrameIndex,
63	bool IsFP) {
64	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
65	MachineFrameInfo &FrameInfo = MF.getFrameInfo();
66
67	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
68	const SIRegisterInfo *TRI = ST.getRegisterInfo();
69
70	// We need to save and restore the current FP/BP.
71
72	// 1: If there is already a VGPR with free lanes, use it. We
73	// may already have to pay the penalty for spilling a CSR VGPR.
74	if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
75	int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
76	TargetStackID::SGPRSpill);
77
78	if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
79	llvm_unreachable("allocate SGPR spill should have worked")__builtin_unreachable();
80
81	FrameIndex = NewFI;
82
83	LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();do { } while (false)
84	dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to "do { } while (false)
85	<< printReg(Spill.VGPR, TRI) << ':' << Spill.Lanedo { } while (false)
86	<< '\n')do { } while (false);
87	return;
88	}
89
90	// 2: Next, try to save the FP/BP in an unused SGPR.
91	TempSGPR = findScratchNonCalleeSaveRegister(
92	MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
93
94	if (!TempSGPR) {
95	int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
96	TargetStackID::SGPRSpill);
97
98	if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
99	// 3: There's no free lane to spill, and no free register to save FP/BP,
100	// so we're forced to spill another VGPR to use for the spill.
101	FrameIndex = NewFI;
102
103	LLVM_DEBUG(do { } while (false)
104	auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();do { } while (false)
105	dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "do { } while (false)
106	<< printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';)do { } while (false);
107	} else {
108	// Remove dead <NewFI> index
109	MF.getFrameInfo().RemoveStackObject(NewFI);
110	// 4: If all else fails, spill the FP/BP to memory.
111	FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
112	LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "do { } while (false)
113	<< (IsFP ? "FP" : "BP") << '\n')do { } while (false);
114	}
115	} else {
116	LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "do { } while (false)
117	<< printReg(TempSGPR, TRI) << '\n')do { } while (false);
118	}
119	}
120
121	// We need to specially emit stack operations here because a different frame
122	// register is used than in the rest of the function, as getFrameRegister would
123	// use.
124	static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
125	const SIMachineFunctionInfo &FuncInfo,
126	LivePhysRegs &LiveRegs, MachineFunction &MF,
127	MachineBasicBlock &MBB,
128	MachineBasicBlock::iterator I, Register SpillReg,
129	int FI) {
130	unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
131	: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
132
133	MachineFrameInfo &FrameInfo = MF.getFrameInfo();
134	MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
135	MachineMemOperand *MMO = MF.getMachineMemOperand(
136	PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
137	FrameInfo.getObjectAlign(FI));
138	LiveRegs.addReg(SpillReg);
139	TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true,
140	FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
141	&LiveRegs);
142	LiveRegs.removeReg(SpillReg);
143	}
144
145	static void buildEpilogRestore(const GCNSubtarget &ST,
146	const SIRegisterInfo &TRI,
147	const SIMachineFunctionInfo &FuncInfo,
148	LivePhysRegs &LiveRegs, MachineFunction &MF,
149	MachineBasicBlock &MBB,
150	MachineBasicBlock::iterator I, Register SpillReg,
151	int FI) {
152	unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
153	: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
154
155	MachineFrameInfo &FrameInfo = MF.getFrameInfo();
156	MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
157	MachineMemOperand *MMO = MF.getMachineMemOperand(
158	PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
159	FrameInfo.getObjectAlign(FI));
160	TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false,
161	FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
162	&LiveRegs);
163	}
164
165	static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
166	const DebugLoc &DL, const SIInstrInfo *TII,
167	Register TargetReg) {
168	MachineFunction *MF = MBB.getParent();
169	const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
170	const SIRegisterInfo *TRI = &TII->getRegisterInfo();
171	const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
172	Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
173	Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
174
175	if (MFI->getGITPtrHigh() != 0xffffffff) {
176	BuildMI(MBB, I, DL, SMovB32, TargetHi)
177	.addImm(MFI->getGITPtrHigh())
178	.addReg(TargetReg, RegState::ImplicitDefine);
179	} else {
180	const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
181	BuildMI(MBB, I, DL, GetPC64, TargetReg);
182	}
183	Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
184	MF->getRegInfo().addLiveIn(GitPtrLo);
185	MBB.addLiveIn(GitPtrLo);
186	BuildMI(MBB, I, DL, SMovB32, TargetLo)
187	.addReg(GitPtrLo);
188	}
189
190	// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
191	void SIFrameLowering::emitEntryFunctionFlatScratchInit(
192	MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
193	const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
194	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
195	const SIInstrInfo *TII = ST.getInstrInfo();
196	const SIRegisterInfo *TRI = &TII->getRegisterInfo();
197	const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
198
199	// We don't need this if we only have spills since there is no user facing
200	// scratch.
201
202	// TODO: If we know we don't have flat instructions earlier, we can omit
203	// this from the input registers.
204	//
205	// TODO: We only need to know if we access scratch space through a flat
206	// pointer. Because we only detect if flat instructions are used at all,
207	// this will be used more often than necessary on VI.
208
209	Register FlatScrInitLo;
210	Register FlatScrInitHi;
211
212	if (ST.isAmdPalOS()) {
213	// Extract the scratch offset from the descriptor in the GIT
214	LivePhysRegs LiveRegs;
215	LiveRegs.init(*TRI);
216	LiveRegs.addLiveIns(MBB);
217
218	// Find unused reg to load flat scratch init into
219	MachineRegisterInfo &MRI = MF.getRegInfo();
220	Register FlatScrInit = AMDGPU::NoRegister;
221	ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
222	unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
223	AllSGPR64s = AllSGPR64s.slice(
224	std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
225	Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
226	for (MCPhysReg Reg : AllSGPR64s) {
227	if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
228	!TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
229	FlatScrInit = Reg;
230	break;
231	}
232	}
233	assert(FlatScrInit && "Failed to find free register for scratch init")(static_cast<void> (0));
234
235	FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
236	FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
237
238	buildGitPtr(MBB, I, DL, TII, FlatScrInit);
239
240	// We now have the GIT ptr - now get the scratch descriptor from the entry
241	// at offset 0 (or offset 16 for a compute shader).
242	MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
243	const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
244	auto *MMO = MF.getMachineMemOperand(
245	PtrInfo,
246	MachineMemOperand::MOLoad \| MachineMemOperand::MOInvariant \|
247	MachineMemOperand::MODereferenceable,
248	8, Align(4));
249	unsigned Offset =
250	MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
251	const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
252	unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
253	BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
254	.addReg(FlatScrInit)
255	.addImm(EncodedOffset) // offset
256	.addImm(0) // cpol
257	.addMemOperand(MMO);
258
259	// Mask the offset in [47:0] of the descriptor
260	const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
261	BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
262	.addReg(FlatScrInitHi)
263	.addImm(0xffff);
264	} else {
265	Register FlatScratchInitReg =
266	MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
267	assert(FlatScratchInitReg)(static_cast<void> (0));
268
269	MachineRegisterInfo &MRI = MF.getRegInfo();
270	MRI.addLiveIn(FlatScratchInitReg);
271	MBB.addLiveIn(FlatScratchInitReg);
272
273	FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
274	FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
275	}
276
277	// Do a 64-bit pointer add.
278	if (ST.flatScratchIsPointer()) {
279	if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
280	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
281	.addReg(FlatScrInitLo)
282	.addReg(ScratchWaveOffsetReg);
283	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
284	.addReg(FlatScrInitHi)
285	.addImm(0);
286	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
287	addReg(FlatScrInitLo).
288	addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO \|
289	(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
290	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
291	addReg(FlatScrInitHi).
292	addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI \|
293	(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
294	return;
295	}
296
297	// For GFX9.
298	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
299	.addReg(FlatScrInitLo)
300	.addReg(ScratchWaveOffsetReg);
301	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
302	.addReg(FlatScrInitHi)
303	.addImm(0);
304
305	return;
306	}
307
308	assert(ST.getGeneration() < AMDGPUSubtarget::GFX9)(static_cast<void> (0));
309
310	// Copy the size in bytes.
311	BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
312	.addReg(FlatScrInitHi, RegState::Kill);
313
314	// Add wave offset in bytes to private base offset.
315	// See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
316	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
317	.addReg(FlatScrInitLo)
318	.addReg(ScratchWaveOffsetReg);
319
320	// Convert offset to 256-byte units.
321	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
322	.addReg(FlatScrInitLo, RegState::Kill)
323	.addImm(8);
324	}
325
326	// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
327	// memory. They should have been removed by now.
328	static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
329	for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
330	I != E; ++I) {
331	if (!MFI.isDeadObjectIndex(I))
332	return false;
333	}
334
335	return true;
336	}
337
338	// Shift down registers reserved for the scratch RSRC.
339	Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
340	MachineFunction &MF) const {
341
342	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
343	const SIInstrInfo *TII = ST.getInstrInfo();
344	const SIRegisterInfo *TRI = &TII->getRegisterInfo();
345	MachineRegisterInfo &MRI = MF.getRegInfo();
346	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
347
348	assert(MFI->isEntryFunction())(static_cast<void> (0));
349
350	Register ScratchRsrcReg = MFI->getScratchRSrcReg();
351
352	if (!ScratchRsrcReg \|\| (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
353	allStackObjectsAreDead(MF.getFrameInfo())))
354	return Register();
355
356	if (ST.hasSGPRInitBug() \|\|
357	ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
358	return ScratchRsrcReg;
359
360	// We reserved the last registers for this. Shift it down to the end of those
361	// which were actually used.
362	//
363	// FIXME: It might be safer to use a pseudoregister before replacement.
364
365	// FIXME: We should be able to eliminate unused input registers. We only
366	// cannot do this for the resources required for scratch access. For now we
367	// skip over user SGPRs and may leave unused holes.
368
369	unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
370	ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
371	AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
372
373	// Skip the last N reserved elements because they should have already been
374	// reserved for VCC etc.
375	Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
376	for (MCPhysReg Reg : AllSGPR128s) {
377	// Pick the first unallocated one. Make sure we don't clobber the other
378	// reserved input we needed. Also for PAL, make sure we don't clobber
379	// the GIT pointer passed in SGPR0 or SGPR8.
380	if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
381	!TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
382	MRI.replaceRegWith(ScratchRsrcReg, Reg);
383	MFI->setScratchRSrcReg(Reg);
384	return Reg;
385	}
386	}
387
388	return ScratchRsrcReg;
389	}
390
391	static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
392	return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
393	}
394
395	void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
396	MachineBasicBlock &MBB) const {
397	assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported")(static_cast<void> (0));
398
399	// FIXME: If we only have SGPR spills, we won't actually be using scratch
400	// memory since these spill to VGPRs. We should be cleaning up these unused
401	// SGPR spill frame indices somewhere.
402
403	// FIXME: We still have implicit uses on SGPR spill instructions in case they
404	// need to spill to vector memory. It's likely that will not happen, but at
405	// this point it appears we need the setup. This part of the prolog should be
406	// emitted after frame indices are eliminated.
407
408	// FIXME: Remove all of the isPhysRegUsed checks
409
410	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
411	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
412	const SIInstrInfo *TII = ST.getInstrInfo();
413	const SIRegisterInfo *TRI = &TII->getRegisterInfo();
414	MachineRegisterInfo &MRI = MF.getRegInfo();
415	const Function &F = MF.getFunction();
416	MachineFrameInfo &FrameInfo = MF.getFrameInfo();
417
418	assert(MFI->isEntryFunction())(static_cast<void> (0));
419
420	Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
421	AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
422	// FIXME: Hack to not crash in situations which emitted an error.
423	if (!PreloadedScratchWaveOffsetReg)
424	return;
425
426	// We need to do the replacement of the private segment buffer register even
427	// if there are no stack objects. There could be stores to undef or a
428	// constant without an associated object.
429	//
430	// This will return `Register()` in cases where there are no actual
431	// uses of the SRSRC.
432	Register ScratchRsrcReg;
433	if (!ST.enableFlatScratch())
434	ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
435
436	// Make the selected register live throughout the function.
437	if (ScratchRsrcReg) {
438	for (MachineBasicBlock &OtherBB : MF) {
439	if (&OtherBB != &MBB) {
440	OtherBB.addLiveIn(ScratchRsrcReg);
441	}
442	}
443	}
444
445	// Now that we have fixed the reserved SRSRC we need to locate the
446	// (potentially) preloaded SRSRC.
447	Register PreloadedScratchRsrcReg;
448	if (ST.isAmdHsaOrMesa(F)) {
449	PreloadedScratchRsrcReg =
450	MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
451	if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
452	// We added live-ins during argument lowering, but since they were not
453	// used they were deleted. We're adding the uses now, so add them back.
454	MRI.addLiveIn(PreloadedScratchRsrcReg);
455	MBB.addLiveIn(PreloadedScratchRsrcReg);
456	}
457	}
458
459	// Debug location must be unknown since the first debug location is used to
460	// determine the end of the prologue.
461	DebugLoc DL;
462	MachineBasicBlock::iterator I = MBB.begin();
463
464	// We found the SRSRC first because it needs four registers and has an
465	// alignment requirement. If the SRSRC that we found is clobbering with
466	// the scratch wave offset, which may be in a fixed SGPR or a free SGPR
467	// chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
468	// wave offset to a free SGPR.
469	Register ScratchWaveOffsetReg;
470	if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
471	ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
472	unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
473	AllSGPRs = AllSGPRs.slice(
474	std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
475	Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
476	for (MCPhysReg Reg : AllSGPRs) {
477	if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
478	!TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
479	ScratchWaveOffsetReg = Reg;
480	BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
481	.addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
482	break;
483	}
484	}
485	} else {
486	ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
487	}
488	assert(ScratchWaveOffsetReg)(static_cast<void> (0));
489
490	if (requiresStackPointerReference(MF)) {
491	Register SPReg = MFI->getStackPtrOffsetReg();
492	assert(SPReg != AMDGPU::SP_REG)(static_cast<void> (0));
493	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
494	.addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
495	}
496
497	if (hasFP(MF)) {
498	Register FPReg = MFI->getFrameOffsetReg();
499	assert(FPReg != AMDGPU::FP_REG)(static_cast<void> (0));
500	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
501	}
502
503	bool NeedsFlatScratchInit =
504	MFI->hasFlatScratchInit() &&
505	(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) \|\| FrameInfo.hasCalls() \|\|
506	(!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
507
508	if ((NeedsFlatScratchInit \|\| ScratchRsrcReg) &&
509	!ST.flatScratchIsArchitected()) {
510	MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
511	MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
512	}
513
514	if (NeedsFlatScratchInit) {
515	emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
516	}
517
518	if (ScratchRsrcReg) {
519	emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
520	PreloadedScratchRsrcReg,
521	ScratchRsrcReg, ScratchWaveOffsetReg);
522	}
523	}
524
525	// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
526	void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
527	MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
528	const DebugLoc &DL, Register PreloadedScratchRsrcReg,
529	Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
530
531	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
532	const SIInstrInfo *TII = ST.getInstrInfo();
533	const SIRegisterInfo *TRI = &TII->getRegisterInfo();
534	const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
535	const Function &Fn = MF.getFunction();
536
537	if (ST.isAmdPalOS()) {
538	// The pointer to the GIT is formed from the offset passed in and either
539	// the amdgpu-git-ptr-high function attribute or the top part of the PC
540	Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
541	Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
542
543	buildGitPtr(MBB, I, DL, TII, Rsrc01);
544
545	// We now have the GIT ptr - now get the scratch descriptor from the entry
546	// at offset 0 (or offset 16 for a compute shader).
547	MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
548	const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
549	auto MMO = MF.getMachineMemOperand(PtrInfo,
550	MachineMemOperand::MOLoad \|
551	MachineMemOperand::MOInvariant \|
552	MachineMemOperand::MODereferenceable,
553	16, Align(4));
554	unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
555	const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
556	unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
557	BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
558	.addReg(Rsrc01)
559	.addImm(EncodedOffset) // offset
560	.addImm(0) // cpol
561	.addReg(ScratchRsrcReg, RegState::ImplicitDefine)
562	.addMemOperand(MMO);
563
564	// The driver will always set the SRD for wave 64 (bits 118:117 of
565	// descriptor / bits 22:21 of third sub-reg will be 0b11)
566	// If the shader is actually wave32 we have to modify the const_index_stride
567	// field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
568	// reason the driver does this is that there can be cases where it presents
569	// 2 shaders with different wave size (e.g. VsFs).
570	// TODO: convert to using SCRATCH instructions or multiple SRD buffers
571	if (ST.isWave32()) {
572	const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
573	BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
574	.addImm(21)
575	.addReg(Rsrc03);
576	}
577	} else if (ST.isMesaGfxShader(Fn) \|\| !PreloadedScratchRsrcReg) {
578	assert(!ST.isAmdHsaOrMesa(Fn))(static_cast<void> (0));
579	const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
580
581	Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
582	Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
583
584	// Use relocations to get the pointer, and setup the other bits manually.
585	uint64_t Rsrc23 = TII->getScratchRsrcWords23();
586
587	if (MFI->hasImplicitBufferPtr()) {
588	Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
589
590	if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
591	const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
592
593	BuildMI(MBB, I, DL, Mov64, Rsrc01)
594	.addReg(MFI->getImplicitBufferPtrUserSGPR())
595	.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
596	} else {
597	const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
598
599	MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
600	auto MMO = MF.getMachineMemOperand(
601	PtrInfo,
602	MachineMemOperand::MOLoad \| MachineMemOperand::MOInvariant \|
603	MachineMemOperand::MODereferenceable,
604	8, Align(4));
605	BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
606	.addReg(MFI->getImplicitBufferPtrUserSGPR())
607	.addImm(0) // offset
608	.addImm(0) // cpol
609	.addMemOperand(MMO)
610	.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
611
612	MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
613	MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
614	}
615	} else {
616	Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
617	Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
618
619	BuildMI(MBB, I, DL, SMovB32, Rsrc0)
620	.addExternalSymbol("SCRATCH_RSRC_DWORD0")
621	.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
622
623	BuildMI(MBB, I, DL, SMovB32, Rsrc1)
624	.addExternalSymbol("SCRATCH_RSRC_DWORD1")
625	.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
626
627	}
628
629	BuildMI(MBB, I, DL, SMovB32, Rsrc2)
630	.addImm(Rsrc23 & 0xffffffff)
631	.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
632
633	BuildMI(MBB, I, DL, SMovB32, Rsrc3)
634	.addImm(Rsrc23 >> 32)
635	.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
636	} else if (ST.isAmdHsaOrMesa(Fn)) {
637	assert(PreloadedScratchRsrcReg)(static_cast<void> (0));
638
639	if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
640	BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
641	.addReg(PreloadedScratchRsrcReg, RegState::Kill);
642	}
643	}
644
645	// Add the scratch wave offset into the scratch RSRC.
646	//
647	// We only want to update the first 48 bits, which is the base address
648	// pointer, without touching the adjacent 16 bits of flags. We know this add
649	// cannot carry-out from bit 47, otherwise the scratch allocation would be
650	// impossible to fit in the 48-bit global address space.
651	//
652	// TODO: Evaluate if it is better to just construct an SRD using the flat
653	// scratch init and some constants rather than update the one we are passed.
654	Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
655	Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
656
657	// We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
658	// the kernel body via inreg arguments.
659	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
660	.addReg(ScratchRsrcSub0)
661	.addReg(ScratchWaveOffsetReg)
662	.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
663	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
664	.addReg(ScratchRsrcSub1)
665	.addImm(0)
666	.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
667	}
668
669	bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
670	switch (ID) {
671	case TargetStackID::Default:
672	case TargetStackID::NoAlloc:
673	case TargetStackID::SGPRSpill:
674	return true;
675	case TargetStackID::ScalableVector:
676	case TargetStackID::WasmLocal:
677	return false;
678	}
679	llvm_unreachable("Invalid TargetStackID::Value")__builtin_unreachable();
680	}
681
682	static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
683	const SIMachineFunctionInfo *FuncInfo,
684	MachineFunction &MF, MachineBasicBlock &MBB,
685	MachineBasicBlock::iterator MBBI, bool IsProlog) {
686	if (LiveRegs.empty()) {
687	LiveRegs.init(TRI);
688	if (IsProlog) {
689	LiveRegs.addLiveIns(MBB);
690	} else {
691	// In epilog.
692	LiveRegs.addLiveOuts(MBB);
693	LiveRegs.stepBackward(*MBBI);
694	}
695	}
696	}
697
698	// Activate all lanes, returns saved exec.
699	static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
700	MachineFunction &MF,
701	MachineBasicBlock &MBB,
702	MachineBasicBlock::iterator MBBI,
703	bool IsProlog) {
704	Register ScratchExecCopy;
705	MachineRegisterInfo &MRI = MF.getRegInfo();
706	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
707	const SIInstrInfo *TII = ST.getInstrInfo();
708	const SIRegisterInfo &TRI = TII->getRegisterInfo();
709	SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
710	DebugLoc DL;
711
712	initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
713
714	ScratchExecCopy = findScratchNonCalleeSaveRegister(
715	MRI, LiveRegs, *TRI.getWaveMaskRegClass());
716	if (!ScratchExecCopy)
717	report_fatal_error("failed to find free scratch register");
718
719	LiveRegs.addReg(ScratchExecCopy);
720
721	const unsigned OrSaveExec =
722	ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
723	BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
724
725	return ScratchExecCopy;
726	}
727
728	// A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
729	// Otherwise we are spilling to memory.
730	static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) {
731	const MachineFrameInfo &MFI = MF.getFrameInfo();
732	return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill;
733	}
734
735	void SIFrameLowering::emitPrologue(MachineFunction &MF,
736	MachineBasicBlock &MBB) const {
737	SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
738	if (FuncInfo->isEntryFunction()) {
739	emitEntryFunctionPrologue(MF, MBB);
740	return;
741	}
742
743	const MachineFrameInfo &MFI = MF.getFrameInfo();
744	MachineRegisterInfo &MRI = MF.getRegInfo();
745	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
746	const SIInstrInfo *TII = ST.getInstrInfo();
747	const SIRegisterInfo &TRI = TII->getRegisterInfo();
748
749	Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
750	Register FramePtrReg = FuncInfo->getFrameOffsetReg();
751	Register BasePtrReg =
752	TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
753	LivePhysRegs LiveRegs;
754
755	MachineBasicBlock::iterator MBBI = MBB.begin();
756	DebugLoc DL;
757
758	bool HasFP = false;
759	bool HasBP = false;
760	uint32_t NumBytes = MFI.getStackSize();
761	uint32_t RoundedSize = NumBytes;
762	// To avoid clobbering VGPRs in lanes that weren't active on function entry,
763	// turn on all lanes before doing the spill to memory.
764	Register ScratchExecCopy;
765
766	Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
767	Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
768
769	// VGPRs used for SGPR->VGPR spills
770	for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
771	FuncInfo->getSGPRSpillVGPRs()) {
772	if (!Reg.FI)
773	continue;
774
775	if (!ScratchExecCopy)
776	ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
777	/IsProlog/ true);
778
779	buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
780	*Reg.FI);
781	}
782
783	// VGPRs used for Whole Wave Mode
784	for (const auto &Reg : FuncInfo->WWMReservedRegs) {
785	auto VGPR = Reg.first;
786	auto FI = Reg.second;
787	if (!FI)
788	continue;
789
790	if (!ScratchExecCopy)
791	ScratchExecCopy =
792	buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /IsProlog/ true);
793
794	buildPrologSpill(ST, TRI, FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, FI);
795	}
796
797	if (ScratchExecCopy) {
798	// FIXME: Split block and make terminator.
799	unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
800	MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
801	BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
802	.addReg(ScratchExecCopy, RegState::Kill);
803	LiveRegs.addReg(ScratchExecCopy);
804	}
805
806	if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) {
807	const int FramePtrFI = *FPSaveIndex;
808	assert(!MFI.isDeadObjectIndex(FramePtrFI))(static_cast<void> (0));
809
810	initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /IsProlog/ true);
811
812	MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
813	MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
814	if (!TmpVGPR)
815	report_fatal_error("failed to find free scratch register");
816
817	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
818	.addReg(FramePtrReg);
819
820	buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
821	FramePtrFI);
822	}
823
824	if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) {
825	const int BasePtrFI = *BPSaveIndex;
826	assert(!MFI.isDeadObjectIndex(BasePtrFI))(static_cast<void> (0));
827
828	initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /IsProlog/ true);
829
830	MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
831	MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
832	if (!TmpVGPR)
833	report_fatal_error("failed to find free scratch register");
834
835	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
836	.addReg(BasePtrReg);
837
838	buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
839	BasePtrFI);
840	}
841
842	// In this case, spill the FP to a reserved VGPR.
843	if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) {
844	const int FramePtrFI = *FPSaveIndex;
845	assert(!MFI.isDeadObjectIndex(FramePtrFI))(static_cast<void> (0));
846
847	assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill)(static_cast<void> (0));
848	ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
849	FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
850	assert(Spill.size() == 1)(static_cast<void> (0));
851
852	// Save FP before setting it up.
853	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
854	.addReg(FramePtrReg)
855	.addImm(Spill[0].Lane)
856	.addReg(Spill[0].VGPR, RegState::Undef);
857	}
858
859	// In this case, spill the BP to a reserved VGPR.
860	if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) {
861	const int BasePtrFI = *BPSaveIndex;
862	assert(!MFI.isDeadObjectIndex(BasePtrFI))(static_cast<void> (0));
863
864	assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill)(static_cast<void> (0));
865	ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
866	FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
867	assert(Spill.size() == 1)(static_cast<void> (0));
868
869	// Save BP before setting it up.
870	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
871	.addReg(BasePtrReg)
872	.addImm(Spill[0].Lane)
873	.addReg(Spill[0].VGPR, RegState::Undef);
874	}
875
876	// Emit the copy if we need an FP, and are using a free SGPR to save it.
877	if (FuncInfo->SGPRForFPSaveRestoreCopy) {
878	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
879	FuncInfo->SGPRForFPSaveRestoreCopy)
880	.addReg(FramePtrReg)
881	.setMIFlag(MachineInstr::FrameSetup);
882	}
883
884	// Emit the copy if we need a BP, and are using a free SGPR to save it.
885	if (FuncInfo->SGPRForBPSaveRestoreCopy) {
886	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
887	FuncInfo->SGPRForBPSaveRestoreCopy)
888	.addReg(BasePtrReg)
889	.setMIFlag(MachineInstr::FrameSetup);
890	}
891
892	// If a copy has been emitted for FP and/or BP, Make the SGPRs
893	// used in the copy instructions live throughout the function.
894	SmallVector<MCPhysReg, 2> TempSGPRs;
895	if (FuncInfo->SGPRForFPSaveRestoreCopy)
896	TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
897
898	if (FuncInfo->SGPRForBPSaveRestoreCopy)
899	TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
900
901	if (!TempSGPRs.empty()) {
902	for (MachineBasicBlock &MBB : MF) {
903	for (MCPhysReg Reg : TempSGPRs)
904	MBB.addLiveIn(Reg);
905
906	MBB.sortUniqueLiveIns();
907	}
908	if (!LiveRegs.empty()) {
909	LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
910	LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
911	}
912	}
913
914	if (TRI.hasStackRealignment(MF)) {
915	HasFP = true;
916	const unsigned Alignment = MFI.getMaxAlign().value();
917
918	RoundedSize += Alignment;
919	if (LiveRegs.empty()) {
920	LiveRegs.init(TRI);
921	LiveRegs.addLiveIns(MBB);
922	}
923
924	// s_add_i32 s33, s32, NumBytes
925	// s_and_b32 s33, s33, 0b111...0000
926	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
927	.addReg(StackPtrReg)
928	.addImm((Alignment - 1) * getScratchScaleFactor(ST))
929	.setMIFlag(MachineInstr::FrameSetup);
930	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
931	.addReg(FramePtrReg, RegState::Kill)
932	.addImm(-Alignment * getScratchScaleFactor(ST))
933	.setMIFlag(MachineInstr::FrameSetup);
934	FuncInfo->setIsStackRealigned(true);
935	} else if ((HasFP = hasFP(MF))) {
936	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
937	.addReg(StackPtrReg)
938	.setMIFlag(MachineInstr::FrameSetup);
939	}
940
941	// If we need a base pointer, set it up here. It's whatever the value of
942	// the stack pointer is at this point. Any variable size objects will be
943	// allocated after this, so we can still use the base pointer to reference
944	// the incoming arguments.
945	if ((HasBP = TRI.hasBasePointer(MF))) {
	Although the value stored to 'HasBP' is used in the enclosing expression, the value is never actually read from 'HasBP'
946	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
947	.addReg(StackPtrReg)
948	.setMIFlag(MachineInstr::FrameSetup);
949	}
950
951	if (HasFP && RoundedSize != 0) {
952	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
953	.addReg(StackPtrReg)
954	.addImm(RoundedSize * getScratchScaleFactor(ST))
955	.setMIFlag(MachineInstr::FrameSetup);
956	}
957
958	assert((!HasFP \|\| (FuncInfo->SGPRForFPSaveRestoreCopy \|\|(static_cast<void> (0))
959	FuncInfo->FramePointerSaveIndex)) &&(static_cast<void> (0))
960	"Needed to save FP but didn't save it anywhere")(static_cast<void> (0));
961
962	// If we allow spilling to AGPRs we may have saved FP but then spill
963	// everything into AGPRs instead of the stack.
964	assert((HasFP \|\| (!FuncInfo->SGPRForFPSaveRestoreCopy &&(static_cast<void> (0))
965	!FuncInfo->FramePointerSaveIndex) \|\|(static_cast<void> (0))
966	EnableSpillVGPRToAGPR) &&(static_cast<void> (0))
967	"Saved FP but didn't need it")(static_cast<void> (0));
968
969	assert((!HasBP \|\| (FuncInfo->SGPRForBPSaveRestoreCopy \|\|(static_cast<void> (0))
970	FuncInfo->BasePointerSaveIndex)) &&(static_cast<void> (0))
971	"Needed to save BP but didn't save it anywhere")(static_cast<void> (0));
972
973	assert((HasBP \|\| (!FuncInfo->SGPRForBPSaveRestoreCopy &&(static_cast<void> (0))
974	!FuncInfo->BasePointerSaveIndex)) &&(static_cast<void> (0))
975	"Saved BP but didn't need it")(static_cast<void> (0));
976	}
977
978	void SIFrameLowering::emitEpilogue(MachineFunction &MF,
979	MachineBasicBlock &MBB) const {
980	const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
981	if (FuncInfo->isEntryFunction())
982	return;
983
984	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
985	const SIInstrInfo *TII = ST.getInstrInfo();
986	MachineRegisterInfo &MRI = MF.getRegInfo();
987	const SIRegisterInfo &TRI = TII->getRegisterInfo();
988	MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
989	LivePhysRegs LiveRegs;
990	DebugLoc DL;
991
992	const MachineFrameInfo &MFI = MF.getFrameInfo();
993	uint32_t NumBytes = MFI.getStackSize();
994	uint32_t RoundedSize = FuncInfo->isStackRealigned()
995	? NumBytes + MFI.getMaxAlign().value()
996	: NumBytes;
997	const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
998	const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
999	const Register BasePtrReg =
1000	TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
1001
1002	Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
1003	Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
1004
1005	if (RoundedSize != 0 && hasFP(MF)) {
1006	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1007	.addReg(StackPtrReg)
1008	.addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
1009	.setMIFlag(MachineInstr::FrameDestroy);
1010	}
1011
1012	if (FuncInfo->SGPRForFPSaveRestoreCopy) {
1013	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1014	.addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
1015	.setMIFlag(MachineInstr::FrameDestroy);
1016	}
1017
1018	if (FuncInfo->SGPRForBPSaveRestoreCopy) {
1019	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1020	.addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
1021	.setMIFlag(MachineInstr::FrameDestroy);
1022	}
1023
1024	if (FPSaveIndex) {
1025	const int FramePtrFI = *FPSaveIndex;
1026	assert(!MFI.isDeadObjectIndex(FramePtrFI))(static_cast<void> (0));
1027	if (spilledToMemory(MF, FramePtrFI)) {
1028	initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /IsProlog/ false);
1029
1030	MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1031	MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1032	if (!TmpVGPR)
1033	report_fatal_error("failed to find free scratch register");
1034	buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
1035	FramePtrFI);
1036	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
1037	.addReg(TmpVGPR, RegState::Kill);
1038	} else {
1039	// Reload from VGPR spill.
1040	assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill)(static_cast<void> (0));
1041	ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1042	FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
1043	assert(Spill.size() == 1)(static_cast<void> (0));
1044	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg)
1045	.addReg(Spill[0].VGPR)
1046	.addImm(Spill[0].Lane);
1047	}
1048	}
1049
1050	if (BPSaveIndex) {
1051	const int BasePtrFI = *BPSaveIndex;
1052	assert(!MFI.isDeadObjectIndex(BasePtrFI))(static_cast<void> (0));
1053	if (spilledToMemory(MF, BasePtrFI)) {
1054	initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /IsProlog/ false);
1055
1056	MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1057	MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1058	if (!TmpVGPR)
1059	report_fatal_error("failed to find free scratch register");
1060	buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
1061	BasePtrFI);
1062	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
1063	.addReg(TmpVGPR, RegState::Kill);
1064	} else {
1065	// Reload from VGPR spill.
1066	assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill)(static_cast<void> (0));
1067	ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1068	FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
1069	assert(Spill.size() == 1)(static_cast<void> (0));
1070	BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg)
1071	.addReg(Spill[0].VGPR)
1072	.addImm(Spill[0].Lane);
1073	}
1074	}
1075
1076	Register ScratchExecCopy;
1077	for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
1078	FuncInfo->getSGPRSpillVGPRs()) {
1079	if (!Reg.FI)
1080	continue;
1081
1082	if (!ScratchExecCopy)
1083	ScratchExecCopy =
1084	buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /IsProlog/ false);
1085
1086	buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
1087	*Reg.FI);
1088	}
1089
1090	for (const auto &Reg : FuncInfo->WWMReservedRegs) {
1091	auto VGPR = Reg.first;
1092	auto FI = Reg.second;
1093	if (!FI)
1094	continue;
1095
1096	if (!ScratchExecCopy)
1097	ScratchExecCopy =
1098	buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /IsProlog/ false);
1099
1100	buildEpilogRestore(ST, TRI, FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, FI);
1101	}
1102
1103	if (ScratchExecCopy) {
1104	// FIXME: Split block and make terminator.
1105	unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1106	MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1107	BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
1108	.addReg(ScratchExecCopy, RegState::Kill);
1109	}
1110	}
1111
1112	#ifndef NDEBUG1
1113	static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1114	const MachineFrameInfo &MFI = MF.getFrameInfo();
1115	const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1116	for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1117	I != E; ++I) {
1118	if (!MFI.isDeadObjectIndex(I) &&
1119	MFI.getStackID(I) == TargetStackID::SGPRSpill &&
1120	(I != FuncInfo->FramePointerSaveIndex &&
1121	I != FuncInfo->BasePointerSaveIndex)) {
1122	return false;
1123	}
1124	}
1125
1126	return true;
1127	}
1128	#endif
1129
1130	StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1131	int FI,
1132	Register &FrameReg) const {
1133	const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1134
1135	FrameReg = RI->getFrameRegister(MF);
1136	return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
1137	}
1138
1139	void SIFrameLowering::processFunctionBeforeFrameFinalized(
1140	MachineFunction &MF,
1141	RegScavenger *RS) const {
1142	MachineFrameInfo &MFI = MF.getFrameInfo();
1143
1144	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1145	const SIInstrInfo *TII = ST.getInstrInfo();
1146	const SIRegisterInfo *TRI = ST.getRegisterInfo();
1147	MachineRegisterInfo &MRI = MF.getRegInfo();
1148	SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1149
1150	const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1151	&& EnableSpillVGPRToAGPR;
1152
1153	if (SpillVGPRToAGPR) {
1154	// To track the spill frame indices handled in this pass.
1155	BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1156
1157	bool SeenDbgInstr = false;
1158
1159	for (MachineBasicBlock &MBB : MF) {
1160	MachineBasicBlock::iterator Next;
1161	for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
1162	MachineInstr &MI = *I;
1163	Next = std::next(I);
1164
1165	if (MI.isDebugInstr())
1166	SeenDbgInstr = true;
1167
1168	if (TII->isVGPRSpill(MI)) {
1169	// Try to eliminate stack used by VGPR spills before frame
1170	// finalization.
1171	unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1172	AMDGPU::OpName::vaddr);
1173	int FI = MI.getOperand(FIOp).getIndex();
1174	Register VReg =
1175	TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1176	if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1177	TRI->isAGPR(MRI, VReg))) {
1178	// FIXME: change to enterBasicBlockEnd()
1179	RS->enterBasicBlock(MBB);
1180	TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1181	SpillFIs.set(FI);
1182	continue;
1183	}
1184	}
1185	}
1186	}
1187
1188	for (MachineBasicBlock &MBB : MF) {
1189	for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1190	MBB.addLiveIn(Reg);
1191
1192	for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1193	MBB.addLiveIn(Reg);
1194
1195	MBB.sortUniqueLiveIns();
1196
1197	if (!SpillFIs.empty() && SeenDbgInstr) {
1198	// FIXME: The dead frame indices are replaced with a null register from
1199	// the debug value instructions. We should instead, update it with the
1200	// correct register value. But not sure the register value alone is
1201	for (MachineInstr &MI : MBB) {
1202	if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
1203	SpillFIs[MI.getOperand(0).getIndex()]) {
1204	MI.getOperand(0).ChangeToRegister(Register(), false /isDef/);
1205	MI.getOperand(0).setIsDebug();
1206	}
1207	}
1208	}
1209	}
1210	}
1211
1212	FuncInfo->removeDeadFrameIndices(MFI);
1213	assert(allSGPRSpillsAreDead(MF) &&(static_cast<void> (0))
1214	"SGPR spill should have been removed in SILowerSGPRSpills")(static_cast<void> (0));
1215
1216	// FIXME: The other checks should be redundant with allStackObjectsAreDead,
1217	// but currently hasNonSpillStackObjects is set only from source
1218	// allocas. Stack temps produced from legalization are not counted currently.
1219	if (!allStackObjectsAreDead(MFI)) {
1220	assert(RS && "RegScavenger required if spilling")(static_cast<void> (0));
1221
1222	// Add an emergency spill slot
1223	RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1224	}
1225	}
1226
1227	// Only report VGPRs to generic code.
1228	void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1229	BitVector &SavedVGPRs,
1230	RegScavenger *RS) const {
1231	TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1232	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1233	if (MFI->isEntryFunction())
1234	return;
1235
1236	MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1237	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1238	const SIRegisterInfo *TRI = ST.getRegisterInfo();
1239
1240	// Ignore the SGPRs the default implementation found.
1241	SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1242
1243	// Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1244	// In gfx908 there was do AGPR loads and stores and thus spilling also
1245	// require a temporary VGPR.
1246	if (!ST.hasGFX90AInsts())
1247	SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1248
1249	// hasFP only knows about stack objects that already exist. We're now
1250	// determining the stack slots that will be created, so we have to predict
1251	// them. Stack objects force FP usage with calls.
1252	//
1253	// Note a new VGPR CSR may be introduced if one is used for the spill, but we
1254	// don't want to report it here.
1255	//
1256	// FIXME: Is this really hasReservedCallFrame?
1257	const bool WillHaveFP =
1258	FrameInfo.hasCalls() &&
1259	(SavedVGPRs.any() \|\| !allStackObjectsAreDead(FrameInfo));
1260
1261	// VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1262	// so don't allow the default insertion to handle them.
1263	for (auto SSpill : MFI->getSGPRSpillVGPRs())
1264	SavedVGPRs.reset(SSpill.VGPR);
1265
1266	LivePhysRegs LiveRegs;
1267	LiveRegs.init(*TRI);
1268
1269	if (WillHaveFP \|\| hasFP(MF)) {
1270	assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex &&(static_cast<void> (0))
1271	"Re-reserving spill slot for FP")(static_cast<void> (0));
1272	getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy,
1273	MFI->FramePointerSaveIndex, true);
1274	}
1275
1276	if (TRI->hasBasePointer(MF)) {
1277	if (MFI->SGPRForFPSaveRestoreCopy)
1278	LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
1279
1280	assert(!MFI->SGPRForBPSaveRestoreCopy &&(static_cast<void> (0))
1281	!MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP")(static_cast<void> (0));
1282	getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy,
1283	MFI->BasePointerSaveIndex, false);
1284	}
1285	}
1286
1287	void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
1288	BitVector &SavedRegs,
1289	RegScavenger *RS) const {
1290	TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1291	const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1292	if (MFI->isEntryFunction())
1293	return;
1294
1295	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1296	const SIRegisterInfo *TRI = ST.getRegisterInfo();
1297
1298	// The SP is specifically managed and we don't want extra spills of it.
1299	SavedRegs.reset(MFI->getStackPtrOffsetReg());
1300
1301	const BitVector AllSavedRegs = SavedRegs;
1302	SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1303
1304	// If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
1305	const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
1306
1307	// We have to anticipate introducing CSR VGPR spills or spill of caller
1308	// save VGPR reserved for SGPR spills as we now always create stack entry
1309	// for it, if we don't have any stack objects already, since we require
1310	// an FP if there is a call and stack.
1311	MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1312	const bool WillHaveFP =
1313	FrameInfo.hasCalls() && (HaveAnyCSRVGPR \|\| MFI->VGPRReservedForSGPRSpill);
1314
1315	// FP will be specially managed like SP.
1316	if (WillHaveFP \|\| hasFP(MF))
1317	SavedRegs.reset(MFI->getFrameOffsetReg());
1318	}
1319
1320	bool SIFrameLowering::assignCalleeSavedSpillSlots(
1321	MachineFunction &MF, const TargetRegisterInfo *TRI,
1322	std::vector<CalleeSavedInfo> &CSI) const {
1323	if (CSI.empty())
1324	return true; // Early exit if no callee saved registers are modified!
1325
1326	const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1327	if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
1328	!FuncInfo->SGPRForBPSaveRestoreCopy)
1329	return false;
1330
1331	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1332	const SIRegisterInfo *RI = ST.getRegisterInfo();
1333	Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1334	Register BasePtrReg = RI->getBaseRegister();
1335	unsigned NumModifiedRegs = 0;
1336
1337	if (FuncInfo->SGPRForFPSaveRestoreCopy)
1338	NumModifiedRegs++;
1339	if (FuncInfo->SGPRForBPSaveRestoreCopy)
1340	NumModifiedRegs++;
1341
1342	for (auto &CS : CSI) {
1343	if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
1344	CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1345	if (--NumModifiedRegs)
1346	break;
1347	} else if (CS.getReg() == BasePtrReg &&
1348	FuncInfo->SGPRForBPSaveRestoreCopy) {
1349	CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
1350	if (--NumModifiedRegs)
1351	break;
1352	}
1353	}
1354
1355	return false;
1356	}
1357
1358	MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
1359	MachineFunction &MF,
1360	MachineBasicBlock &MBB,
1361	MachineBasicBlock::iterator I) const {
1362	int64_t Amount = I->getOperand(0).getImm();
1363	if (Amount == 0)
1364	return MBB.erase(I);
1365
1366	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1367	const SIInstrInfo *TII = ST.getInstrInfo();
1368	const DebugLoc &DL = I->getDebugLoc();
1369	unsigned Opc = I->getOpcode();
1370	bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1371	uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1372
1373	if (!hasReservedCallFrame(MF)) {
1374	Amount = alignTo(Amount, getStackAlign());
1375	assert(isUInt<32>(Amount) && "exceeded stack address space size")(static_cast<void> (0));
1376	const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1377	Register SPReg = MFI->getStackPtrOffsetReg();
1378
1379	Amount *= getScratchScaleFactor(ST);
1380	if (IsDestroy)
1381	Amount = -Amount;
1382	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
1383	.addReg(SPReg)
1384	.addImm(Amount);
1385	} else if (CalleePopAmount != 0) {
1386	llvm_unreachable("is this used?")__builtin_unreachable();
1387	}
1388
1389	return MBB.erase(I);
1390	}
1391
1392	/// Returns true if the frame will require a reference to the stack pointer.
1393	///
1394	/// This is the set of conditions common to setting up the stack pointer in a
1395	/// kernel, and for using a frame pointer in a callable function.
1396	///
1397	/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1398	/// references SP.
1399	static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
1400	return MFI.hasVarSizedObjects() \|\| MFI.hasStackMap() \|\| MFI.hasPatchPoint();
1401	}
1402
1403	// The FP for kernels is always known 0, so we never really need to setup an
1404	// explicit register for it. However, DisableFramePointerElim will force us to
1405	// use a register for it.
1406	bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
1407	const MachineFrameInfo &MFI = MF.getFrameInfo();
1408
1409	// For entry functions we can use an immediate offset in most cases, so the
1410	// presence of calls doesn't imply we need a distinct frame pointer.
1411	if (MFI.hasCalls() &&
1412	!MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1413	// All offsets are unsigned, so need to be addressed in the same direction
1414	// as stack growth.
1415
1416	// FIXME: This function is pretty broken, since it can be called before the
1417	// frame layout is determined or CSR spills are inserted.
1418	return MFI.getStackSize() != 0;
1419	}
1420
1421	return frameTriviallyRequiresSP(MFI) \|\| MFI.isFrameAddressTaken() \|\|
1422	MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1423	MF) \|\|
1424	MF.getTarget().Options.DisableFramePointerElim(MF);
1425	}
1426
1427	// This is essentially a reduced version of hasFP for entry functions. Since the
1428	// stack pointer is known 0 on entry to kernels, we never really need an FP
1429	// register. We may need to initialize the stack pointer depending on the frame
1430	// properties, which logically overlaps many of the cases where an ordinary
1431	// function would require an FP.
1432	bool SIFrameLowering::requiresStackPointerReference(
1433	const MachineFunction &MF) const {
1434	// Callable functions always require a stack pointer reference.
1435	assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&(static_cast<void> (0))
1436	"only expected to call this for entry points")(static_cast<void> (0));
1437
1438	const MachineFrameInfo &MFI = MF.getFrameInfo();
1439
1440	// Entry points ordinarily don't need to initialize SP. We have to set it up
1441	// for callees if there are any. Also note tail calls are impossible/don't
1442	// make any sense for kernels.
1443	if (MFI.hasCalls())
1444	return true;
1445
1446	// We still need to initialize the SP if we're doing anything weird that
1447	// references the SP, like variable sized stack objects.
1448	return frameTriviallyRequiresSP(MFI);
1449	}