File: | llvm/lib/Target/PowerPC/PPCFrameLowering.cpp |
Warning: | line 1600, column 8 Value stored to 'SingleScratchReg' during its initialization is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the PPC implementation of TargetFrameLowering class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "MCTargetDesc/PPCPredicates.h" |
14 | #include "PPCFrameLowering.h" |
15 | #include "PPCInstrBuilder.h" |
16 | #include "PPCInstrInfo.h" |
17 | #include "PPCMachineFunctionInfo.h" |
18 | #include "PPCSubtarget.h" |
19 | #include "PPCTargetMachine.h" |
20 | #include "llvm/ADT/Statistic.h" |
21 | #include "llvm/CodeGen/MachineFrameInfo.h" |
22 | #include "llvm/CodeGen/MachineFunction.h" |
23 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
24 | #include "llvm/CodeGen/MachineModuleInfo.h" |
25 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
26 | #include "llvm/CodeGen/RegisterScavenging.h" |
27 | #include "llvm/IR/Function.h" |
28 | #include "llvm/Target/TargetOptions.h" |
29 | |
30 | using namespace llvm; |
31 | |
32 | #define DEBUG_TYPE"framelowering" "framelowering" |
33 | STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue")static llvm::Statistic NumPESpillVSR = {"framelowering", "NumPESpillVSR" , "Number of spills to vector in prologue"}; |
34 | STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue")static llvm::Statistic NumPEReloadVSR = {"framelowering", "NumPEReloadVSR" , "Number of reloads from vector in epilogue"}; |
35 | STATISTIC(NumPrologProbed, "Number of prologues probed")static llvm::Statistic NumPrologProbed = {"framelowering", "NumPrologProbed" , "Number of prologues probed"}; |
36 | |
37 | static cl::opt<bool> |
38 | EnablePEVectorSpills("ppc-enable-pe-vector-spills", |
39 | cl::desc("Enable spills in prologue to vector registers."), |
40 | cl::init(false), cl::Hidden); |
41 | |
42 | static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { |
43 | if (STI.isAIXABI()) |
44 | return STI.isPPC64() ? 16 : 8; |
45 | // SVR4 ABI: |
46 | return STI.isPPC64() ? 16 : 4; |
47 | } |
48 | |
49 | static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { |
50 | if (STI.isAIXABI()) |
51 | return STI.isPPC64() ? 40 : 20; |
52 | return STI.isELFv2ABI() ? 24 : 40; |
53 | } |
54 | |
55 | static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { |
56 | // First slot in the general register save area. |
57 | return STI.isPPC64() ? -8U : -4U; |
58 | } |
59 | |
60 | static unsigned computeLinkageSize(const PPCSubtarget &STI) { |
61 | if (STI.isAIXABI() || STI.isPPC64()) |
62 | return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); |
63 | |
64 | // 32-bit SVR4 ABI: |
65 | return 8; |
66 | } |
67 | |
68 | static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { |
69 | // Third slot in the general purpose register save area. |
70 | if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) |
71 | return -12U; |
72 | |
73 | // Second slot in the general purpose register save area. |
74 | return STI.isPPC64() ? -16U : -8U; |
75 | } |
76 | |
77 | static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { |
78 | return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; |
79 | } |
80 | |
81 | PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) |
82 | : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, |
83 | STI.getPlatformStackAlignment(), 0), |
84 | Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), |
85 | TOCSaveOffset(computeTOCSaveOffset(Subtarget)), |
86 | FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), |
87 | LinkageSize(computeLinkageSize(Subtarget)), |
88 | BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), |
89 | CRSaveOffset(computeCRSaveOffset(Subtarget)) {} |
90 | |
91 | // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. |
92 | const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( |
93 | unsigned &NumEntries) const { |
94 | |
95 | // Floating-point register save area offsets. |
96 | #define CALLEE_SAVED_FPRS{PPC::F31, -8}, {PPC::F30, -16}, {PPC::F29, -24}, {PPC::F28, - 32}, {PPC::F27, -40}, {PPC::F26, -48}, {PPC::F25, -56}, {PPC:: F24, -64}, {PPC::F23, -72}, {PPC::F22, -80}, {PPC::F21, -88}, {PPC::F20, -96}, {PPC::F19, -104}, {PPC::F18, -112}, {PPC::F17 , -120}, {PPC::F16, -128}, {PPC::F15, -136}, {PPC::F14, -144} \ |
97 | {PPC::F31, -8}, \ |
98 | {PPC::F30, -16}, \ |
99 | {PPC::F29, -24}, \ |
100 | {PPC::F28, -32}, \ |
101 | {PPC::F27, -40}, \ |
102 | {PPC::F26, -48}, \ |
103 | {PPC::F25, -56}, \ |
104 | {PPC::F24, -64}, \ |
105 | {PPC::F23, -72}, \ |
106 | {PPC::F22, -80}, \ |
107 | {PPC::F21, -88}, \ |
108 | {PPC::F20, -96}, \ |
109 | {PPC::F19, -104}, \ |
110 | {PPC::F18, -112}, \ |
111 | {PPC::F17, -120}, \ |
112 | {PPC::F16, -128}, \ |
113 | {PPC::F15, -136}, \ |
114 | {PPC::F14, -144} |
115 | |
116 | // 32-bit general purpose register save area offsets shared by ELF and |
117 | // AIX. AIX has an extra CSR with r13. |
118 | #define CALLEE_SAVED_GPRS32{PPC::R31, -4}, {PPC::R30, -8}, {PPC::R29, -12}, {PPC::R28, - 16}, {PPC::R27, -20}, {PPC::R26, -24}, {PPC::R25, -28}, {PPC:: R24, -32}, {PPC::R23, -36}, {PPC::R22, -40}, {PPC::R21, -44}, {PPC::R20, -48}, {PPC::R19, -52}, {PPC::R18, -56}, {PPC::R17 , -60}, {PPC::R16, -64}, {PPC::R15, -68}, {PPC::R14, -72} \ |
119 | {PPC::R31, -4}, \ |
120 | {PPC::R30, -8}, \ |
121 | {PPC::R29, -12}, \ |
122 | {PPC::R28, -16}, \ |
123 | {PPC::R27, -20}, \ |
124 | {PPC::R26, -24}, \ |
125 | {PPC::R25, -28}, \ |
126 | {PPC::R24, -32}, \ |
127 | {PPC::R23, -36}, \ |
128 | {PPC::R22, -40}, \ |
129 | {PPC::R21, -44}, \ |
130 | {PPC::R20, -48}, \ |
131 | {PPC::R19, -52}, \ |
132 | {PPC::R18, -56}, \ |
133 | {PPC::R17, -60}, \ |
134 | {PPC::R16, -64}, \ |
135 | {PPC::R15, -68}, \ |
136 | {PPC::R14, -72} |
137 | |
138 | // 64-bit general purpose register save area offsets. |
139 | #define CALLEE_SAVED_GPRS64{PPC::X31, -8}, {PPC::X30, -16}, {PPC::X29, -24}, {PPC::X28, - 32}, {PPC::X27, -40}, {PPC::X26, -48}, {PPC::X25, -56}, {PPC:: X24, -64}, {PPC::X23, -72}, {PPC::X22, -80}, {PPC::X21, -88}, {PPC::X20, -96}, {PPC::X19, -104}, {PPC::X18, -112}, {PPC::X17 , -120}, {PPC::X16, -128}, {PPC::X15, -136}, {PPC::X14, -144} \ |
140 | {PPC::X31, -8}, \ |
141 | {PPC::X30, -16}, \ |
142 | {PPC::X29, -24}, \ |
143 | {PPC::X28, -32}, \ |
144 | {PPC::X27, -40}, \ |
145 | {PPC::X26, -48}, \ |
146 | {PPC::X25, -56}, \ |
147 | {PPC::X24, -64}, \ |
148 | {PPC::X23, -72}, \ |
149 | {PPC::X22, -80}, \ |
150 | {PPC::X21, -88}, \ |
151 | {PPC::X20, -96}, \ |
152 | {PPC::X19, -104}, \ |
153 | {PPC::X18, -112}, \ |
154 | {PPC::X17, -120}, \ |
155 | {PPC::X16, -128}, \ |
156 | {PPC::X15, -136}, \ |
157 | {PPC::X14, -144} |
158 | |
159 | // Vector register save area offsets. |
160 | #define CALLEE_SAVED_VRS{PPC::V31, -16}, {PPC::V30, -32}, {PPC::V29, -48}, {PPC::V28, -64}, {PPC::V27, -80}, {PPC::V26, -96}, {PPC::V25, -112}, {PPC ::V24, -128}, {PPC::V23, -144}, {PPC::V22, -160}, {PPC::V21, - 176}, {PPC::V20, -192} \ |
161 | {PPC::V31, -16}, \ |
162 | {PPC::V30, -32}, \ |
163 | {PPC::V29, -48}, \ |
164 | {PPC::V28, -64}, \ |
165 | {PPC::V27, -80}, \ |
166 | {PPC::V26, -96}, \ |
167 | {PPC::V25, -112}, \ |
168 | {PPC::V24, -128}, \ |
169 | {PPC::V23, -144}, \ |
170 | {PPC::V22, -160}, \ |
171 | {PPC::V21, -176}, \ |
172 | {PPC::V20, -192} |
173 | |
174 | // Note that the offsets here overlap, but this is fixed up in |
175 | // processFunctionBeforeFrameFinalized. |
176 | |
177 | static const SpillSlot ELFOffsets32[] = { |
178 | CALLEE_SAVED_FPRS{PPC::F31, -8}, {PPC::F30, -16}, {PPC::F29, -24}, {PPC::F28, - 32}, {PPC::F27, -40}, {PPC::F26, -48}, {PPC::F25, -56}, {PPC:: F24, -64}, {PPC::F23, -72}, {PPC::F22, -80}, {PPC::F21, -88}, {PPC::F20, -96}, {PPC::F19, -104}, {PPC::F18, -112}, {PPC::F17 , -120}, {PPC::F16, -128}, {PPC::F15, -136}, {PPC::F14, -144}, |
179 | CALLEE_SAVED_GPRS32{PPC::R31, -4}, {PPC::R30, -8}, {PPC::R29, -12}, {PPC::R28, - 16}, {PPC::R27, -20}, {PPC::R26, -24}, {PPC::R25, -28}, {PPC:: R24, -32}, {PPC::R23, -36}, {PPC::R22, -40}, {PPC::R21, -44}, {PPC::R20, -48}, {PPC::R19, -52}, {PPC::R18, -56}, {PPC::R17 , -60}, {PPC::R16, -64}, {PPC::R15, -68}, {PPC::R14, -72}, |
180 | |
181 | // CR save area offset. We map each of the nonvolatile CR fields |
182 | // to the slot for CR2, which is the first of the nonvolatile CR |
183 | // fields to be assigned, so that we only allocate one save slot. |
184 | // See PPCRegisterInfo::hasReservedSpillSlot() for more information. |
185 | {PPC::CR2, -4}, |
186 | |
187 | // VRSAVE save area offset. |
188 | {PPC::VRSAVE, -4}, |
189 | |
190 | CALLEE_SAVED_VRS{PPC::V31, -16}, {PPC::V30, -32}, {PPC::V29, -48}, {PPC::V28, -64}, {PPC::V27, -80}, {PPC::V26, -96}, {PPC::V25, -112}, {PPC ::V24, -128}, {PPC::V23, -144}, {PPC::V22, -160}, {PPC::V21, - 176}, {PPC::V20, -192}, |
191 | |
192 | // SPE register save area (overlaps Vector save area). |
193 | {PPC::S31, -8}, |
194 | {PPC::S30, -16}, |
195 | {PPC::S29, -24}, |
196 | {PPC::S28, -32}, |
197 | {PPC::S27, -40}, |
198 | {PPC::S26, -48}, |
199 | {PPC::S25, -56}, |
200 | {PPC::S24, -64}, |
201 | {PPC::S23, -72}, |
202 | {PPC::S22, -80}, |
203 | {PPC::S21, -88}, |
204 | {PPC::S20, -96}, |
205 | {PPC::S19, -104}, |
206 | {PPC::S18, -112}, |
207 | {PPC::S17, -120}, |
208 | {PPC::S16, -128}, |
209 | {PPC::S15, -136}, |
210 | {PPC::S14, -144}}; |
211 | |
212 | static const SpillSlot ELFOffsets64[] = { |
213 | CALLEE_SAVED_FPRS{PPC::F31, -8}, {PPC::F30, -16}, {PPC::F29, -24}, {PPC::F28, - 32}, {PPC::F27, -40}, {PPC::F26, -48}, {PPC::F25, -56}, {PPC:: F24, -64}, {PPC::F23, -72}, {PPC::F22, -80}, {PPC::F21, -88}, {PPC::F20, -96}, {PPC::F19, -104}, {PPC::F18, -112}, {PPC::F17 , -120}, {PPC::F16, -128}, {PPC::F15, -136}, {PPC::F14, -144}, |
214 | CALLEE_SAVED_GPRS64{PPC::X31, -8}, {PPC::X30, -16}, {PPC::X29, -24}, {PPC::X28, - 32}, {PPC::X27, -40}, {PPC::X26, -48}, {PPC::X25, -56}, {PPC:: X24, -64}, {PPC::X23, -72}, {PPC::X22, -80}, {PPC::X21, -88}, {PPC::X20, -96}, {PPC::X19, -104}, {PPC::X18, -112}, {PPC::X17 , -120}, {PPC::X16, -128}, {PPC::X15, -136}, {PPC::X14, -144}, |
215 | |
216 | // VRSAVE save area offset. |
217 | {PPC::VRSAVE, -4}, |
218 | CALLEE_SAVED_VRS{PPC::V31, -16}, {PPC::V30, -32}, {PPC::V29, -48}, {PPC::V28, -64}, {PPC::V27, -80}, {PPC::V26, -96}, {PPC::V25, -112}, {PPC ::V24, -128}, {PPC::V23, -144}, {PPC::V22, -160}, {PPC::V21, - 176}, {PPC::V20, -192} |
219 | }; |
220 | |
221 | static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS{PPC::F31, -8}, {PPC::F30, -16}, {PPC::F29, -24}, {PPC::F28, - 32}, {PPC::F27, -40}, {PPC::F26, -48}, {PPC::F25, -56}, {PPC:: F24, -64}, {PPC::F23, -72}, {PPC::F22, -80}, {PPC::F21, -88}, {PPC::F20, -96}, {PPC::F19, -104}, {PPC::F18, -112}, {PPC::F17 , -120}, {PPC::F16, -128}, {PPC::F15, -136}, {PPC::F14, -144}, |
222 | CALLEE_SAVED_GPRS32{PPC::R31, -4}, {PPC::R30, -8}, {PPC::R29, -12}, {PPC::R28, - 16}, {PPC::R27, -20}, {PPC::R26, -24}, {PPC::R25, -28}, {PPC:: R24, -32}, {PPC::R23, -36}, {PPC::R22, -40}, {PPC::R21, -44}, {PPC::R20, -48}, {PPC::R19, -52}, {PPC::R18, -56}, {PPC::R17 , -60}, {PPC::R16, -64}, {PPC::R15, -68}, {PPC::R14, -72}, |
223 | // Add AIX's extra CSR. |
224 | {PPC::R13, -76}, |
225 | CALLEE_SAVED_VRS{PPC::V31, -16}, {PPC::V30, -32}, {PPC::V29, -48}, {PPC::V28, -64}, {PPC::V27, -80}, {PPC::V26, -96}, {PPC::V25, -112}, {PPC ::V24, -128}, {PPC::V23, -144}, {PPC::V22, -160}, {PPC::V21, - 176}, {PPC::V20, -192}}; |
226 | |
227 | static const SpillSlot AIXOffsets64[] = { |
228 | CALLEE_SAVED_FPRS{PPC::F31, -8}, {PPC::F30, -16}, {PPC::F29, -24}, {PPC::F28, - 32}, {PPC::F27, -40}, {PPC::F26, -48}, {PPC::F25, -56}, {PPC:: F24, -64}, {PPC::F23, -72}, {PPC::F22, -80}, {PPC::F21, -88}, {PPC::F20, -96}, {PPC::F19, -104}, {PPC::F18, -112}, {PPC::F17 , -120}, {PPC::F16, -128}, {PPC::F15, -136}, {PPC::F14, -144}, CALLEE_SAVED_GPRS64{PPC::X31, -8}, {PPC::X30, -16}, {PPC::X29, -24}, {PPC::X28, - 32}, {PPC::X27, -40}, {PPC::X26, -48}, {PPC::X25, -56}, {PPC:: X24, -64}, {PPC::X23, -72}, {PPC::X22, -80}, {PPC::X21, -88}, {PPC::X20, -96}, {PPC::X19, -104}, {PPC::X18, -112}, {PPC::X17 , -120}, {PPC::X16, -128}, {PPC::X15, -136}, {PPC::X14, -144}, CALLEE_SAVED_VRS{PPC::V31, -16}, {PPC::V30, -32}, {PPC::V29, -48}, {PPC::V28, -64}, {PPC::V27, -80}, {PPC::V26, -96}, {PPC::V25, -112}, {PPC ::V24, -128}, {PPC::V23, -144}, {PPC::V22, -160}, {PPC::V21, - 176}, {PPC::V20, -192}}; |
229 | |
230 | if (Subtarget.is64BitELFABI()) { |
231 | NumEntries = array_lengthof(ELFOffsets64); |
232 | return ELFOffsets64; |
233 | } |
234 | |
235 | if (Subtarget.is32BitELFABI()) { |
236 | NumEntries = array_lengthof(ELFOffsets32); |
237 | return ELFOffsets32; |
238 | } |
239 | |
240 | assert(Subtarget.isAIXABI() && "Unexpected ABI.")(static_cast<void> (0)); |
241 | |
242 | if (Subtarget.isPPC64()) { |
243 | NumEntries = array_lengthof(AIXOffsets64); |
244 | return AIXOffsets64; |
245 | } |
246 | |
247 | NumEntries = array_lengthof(AIXOffsets32); |
248 | return AIXOffsets32; |
249 | } |
250 | |
251 | static bool spillsCR(const MachineFunction &MF) { |
252 | const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
253 | return FuncInfo->isCRSpilled(); |
254 | } |
255 | |
256 | static bool hasSpills(const MachineFunction &MF) { |
257 | const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
258 | return FuncInfo->hasSpills(); |
259 | } |
260 | |
261 | static bool hasNonRISpills(const MachineFunction &MF) { |
262 | const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
263 | return FuncInfo->hasNonRISpills(); |
264 | } |
265 | |
266 | /// MustSaveLR - Return true if this function requires that we save the LR |
267 | /// register onto the stack in the prolog and restore it in the epilog of the |
268 | /// function. |
269 | static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { |
270 | const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); |
271 | |
272 | // We need a save/restore of LR if there is any def of LR (which is |
273 | // defined by calls, including the PIC setup sequence), or if there is |
274 | // some use of the LR stack slot (e.g. for builtin_return_address). |
275 | // (LR comes in 32 and 64 bit versions.) |
276 | MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); |
277 | return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); |
278 | } |
279 | |
280 | /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum |
281 | /// call frame size. Update the MachineFunction object with the stack size. |
282 | uint64_t |
283 | PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, |
284 | bool UseEstimate) const { |
285 | unsigned NewMaxCallFrameSize = 0; |
286 | uint64_t FrameSize = determineFrameLayout(MF, UseEstimate, |
287 | &NewMaxCallFrameSize); |
288 | MF.getFrameInfo().setStackSize(FrameSize); |
289 | MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); |
290 | return FrameSize; |
291 | } |
292 | |
293 | /// determineFrameLayout - Determine the size of the frame and maximum call |
294 | /// frame size. |
295 | uint64_t |
296 | PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, |
297 | bool UseEstimate, |
298 | unsigned *NewMaxCallFrameSize) const { |
299 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
300 | const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
301 | |
302 | // Get the number of bytes to allocate from the FrameInfo |
303 | uint64_t FrameSize = |
304 | UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); |
305 | |
306 | // Get stack alignments. The frame must be aligned to the greatest of these: |
307 | Align TargetAlign = getStackAlign(); // alignment required per the ABI |
308 | Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame |
309 | Align Alignment = std::max(TargetAlign, MaxAlign); |
310 | |
311 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
312 | |
313 | unsigned LR = RegInfo->getRARegister(); |
314 | bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); |
315 | bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. |
316 | !MFI.adjustsStack() && // No calls. |
317 | !MustSaveLR(MF, LR) && // No need to save LR. |
318 | !FI->mustSaveTOC() && // No need to save TOC. |
319 | !RegInfo->hasBasePointer(MF); // No special alignment. |
320 | |
321 | // Note: for PPC32 SVR4ABI, we can still generate stackless |
322 | // code if all local vars are reg-allocated. |
323 | bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); |
324 | |
325 | // Check whether we can skip adjusting the stack pointer (by using red zone) |
326 | if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { |
327 | // No need for frame |
328 | return 0; |
329 | } |
330 | |
331 | // Get the maximum call frame size of all the calls. |
332 | unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); |
333 | |
334 | // Maximum call frame needs to be at least big enough for linkage area. |
335 | unsigned minCallFrameSize = getLinkageSize(); |
336 | maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); |
337 | |
338 | // If we have dynamic alloca then maxCallFrameSize needs to be aligned so |
339 | // that allocations will be aligned. |
340 | if (MFI.hasVarSizedObjects()) |
341 | maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); |
342 | |
343 | // Update the new max call frame size if the caller passes in a valid pointer. |
344 | if (NewMaxCallFrameSize) |
345 | *NewMaxCallFrameSize = maxCallFrameSize; |
346 | |
347 | // Include call frame size in total. |
348 | FrameSize += maxCallFrameSize; |
349 | |
350 | // Make sure the frame is aligned. |
351 | FrameSize = alignTo(FrameSize, Alignment); |
352 | |
353 | return FrameSize; |
354 | } |
355 | |
356 | // hasFP - Return true if the specified function actually has a dedicated frame |
357 | // pointer register. |
358 | bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { |
359 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
360 | // FIXME: This is pretty much broken by design: hasFP() might be called really |
361 | // early, before the stack layout was calculated and thus hasFP() might return |
362 | // true or false here depending on the time of call. |
363 | return (MFI.getStackSize()) && needsFP(MF); |
364 | } |
365 | |
366 | // needsFP - Return true if the specified function should have a dedicated frame |
367 | // pointer register. This is true if the function has variable sized allocas or |
368 | // if frame pointer elimination is disabled. |
369 | bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { |
370 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
371 | |
372 | // Naked functions have no stack frame pushed, so we don't have a frame |
373 | // pointer. |
374 | if (MF.getFunction().hasFnAttribute(Attribute::Naked)) |
375 | return false; |
376 | |
377 | return MF.getTarget().Options.DisableFramePointerElim(MF) || |
378 | MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || |
379 | MF.exposesReturnsTwice() || |
380 | (MF.getTarget().Options.GuaranteedTailCallOpt && |
381 | MF.getInfo<PPCFunctionInfo>()->hasFastCall()); |
382 | } |
383 | |
384 | void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { |
385 | bool is31 = needsFP(MF); |
386 | unsigned FPReg = is31 ? PPC::R31 : PPC::R1; |
387 | unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; |
388 | |
389 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
390 | bool HasBP = RegInfo->hasBasePointer(MF); |
391 | unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; |
392 | unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; |
393 | |
394 | for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); |
395 | BI != BE; ++BI) |
396 | for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { |
397 | --MBBI; |
398 | for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { |
399 | MachineOperand &MO = MBBI->getOperand(I); |
400 | if (!MO.isReg()) |
401 | continue; |
402 | |
403 | switch (MO.getReg()) { |
404 | case PPC::FP: |
405 | MO.setReg(FPReg); |
406 | break; |
407 | case PPC::FP8: |
408 | MO.setReg(FP8Reg); |
409 | break; |
410 | case PPC::BP: |
411 | MO.setReg(BPReg); |
412 | break; |
413 | case PPC::BP8: |
414 | MO.setReg(BP8Reg); |
415 | break; |
416 | |
417 | } |
418 | } |
419 | } |
420 | } |
421 | |
422 | /* This function will do the following: |
423 | - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 |
424 | respectively (defaults recommended by the ABI) and return true |
425 | - If MBB is not an entry block, initialize the register scavenger and look |
426 | for available registers. |
427 | - If the defaults (R0/R12) are available, return true |
428 | - If TwoUniqueRegsRequired is set to true, it looks for two unique |
429 | registers. Otherwise, look for a single available register. |
430 | - If the required registers are found, set SR1 and SR2 and return true. |
431 | - If the required registers are not found, set SR2 or both SR1 and SR2 to |
432 | PPC::NoRegister and return false. |
433 | |
434 | Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired |
435 | is not set, this function will attempt to find two different registers, but |
436 | still return true if only one register is available (and set SR1 == SR2). |
437 | */ |
438 | bool |
439 | PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, |
440 | bool UseAtEnd, |
441 | bool TwoUniqueRegsRequired, |
442 | Register *SR1, |
443 | Register *SR2) const { |
444 | RegScavenger RS; |
445 | Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; |
446 | Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; |
447 | |
448 | // Set the defaults for the two scratch registers. |
449 | if (SR1) |
450 | *SR1 = R0; |
451 | |
452 | if (SR2) { |
453 | assert (SR1 && "Asking for the second scratch register but not the first?")(static_cast<void> (0)); |
454 | *SR2 = R12; |
455 | } |
456 | |
457 | // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. |
458 | if ((UseAtEnd && MBB->isReturnBlock()) || |
459 | (!UseAtEnd && (&MBB->getParent()->front() == MBB))) |
460 | return true; |
461 | |
462 | RS.enterBasicBlock(*MBB); |
463 | |
464 | if (UseAtEnd && !MBB->empty()) { |
465 | // The scratch register will be used at the end of the block, so must |
466 | // consider all registers used within the block |
467 | |
468 | MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); |
469 | // If no terminator, back iterator up to previous instruction. |
470 | if (MBBI == MBB->end()) |
471 | MBBI = std::prev(MBBI); |
472 | |
473 | if (MBBI != MBB->begin()) |
474 | RS.forward(MBBI); |
475 | } |
476 | |
477 | // If the two registers are available, we're all good. |
478 | // Note that we only return here if both R0 and R12 are available because |
479 | // although the function may not require two unique registers, it may benefit |
480 | // from having two so we should try to provide them. |
481 | if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) |
482 | return true; |
483 | |
484 | // Get the list of callee-saved registers for the target. |
485 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
486 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); |
487 | |
488 | // Get all the available registers in the block. |
489 | BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : |
490 | &PPC::GPRCRegClass); |
491 | |
492 | // We shouldn't use callee-saved registers as scratch registers as they may be |
493 | // available when looking for a candidate block for shrink wrapping but not |
494 | // available when the actual prologue/epilogue is being emitted because they |
495 | // were added as live-in to the prologue block by PrologueEpilogueInserter. |
496 | for (int i = 0; CSRegs[i]; ++i) |
497 | BV.reset(CSRegs[i]); |
498 | |
499 | // Set the first scratch register to the first available one. |
500 | if (SR1) { |
501 | int FirstScratchReg = BV.find_first(); |
502 | *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; |
503 | } |
504 | |
505 | // If there is another one available, set the second scratch register to that. |
506 | // Otherwise, set it to either PPC::NoRegister if this function requires two |
507 | // or to whatever SR1 is set to if this function doesn't require two. |
508 | if (SR2) { |
509 | int SecondScratchReg = BV.find_next(*SR1); |
510 | if (SecondScratchReg != -1) |
511 | *SR2 = SecondScratchReg; |
512 | else |
513 | *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; |
514 | } |
515 | |
516 | // Now that we've done our best to provide both registers, double check |
517 | // whether we were unable to provide enough. |
518 | if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) |
519 | return false; |
520 | |
521 | return true; |
522 | } |
523 | |
524 | // We need a scratch register for spilling LR and for spilling CR. By default, |
525 | // we use two scratch registers to hide latency. However, if only one scratch |
526 | // register is available, we can adjust for that by not overlapping the spill |
527 | // code. However, if we need to realign the stack (i.e. have a base pointer) |
528 | // and the stack frame is large, we need two scratch registers. |
529 | // Also, stack probe requires two scratch registers, one for old sp, one for |
530 | // large frame and large probe size. |
531 | bool |
532 | PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { |
533 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
534 | MachineFunction &MF = *(MBB->getParent()); |
535 | bool HasBP = RegInfo->hasBasePointer(MF); |
536 | unsigned FrameSize = determineFrameLayout(MF); |
537 | int NegFrameSize = -FrameSize; |
538 | bool IsLargeFrame = !isInt<16>(NegFrameSize); |
539 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
540 | Align MaxAlign = MFI.getMaxAlign(); |
541 | bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); |
542 | const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); |
543 | |
544 | return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || |
545 | TLI.hasInlineStackProbe(MF); |
546 | } |
547 | |
548 | bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { |
549 | MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); |
550 | |
551 | return findScratchRegister(TmpMBB, false, |
552 | twoUniqueScratchRegsRequired(TmpMBB)); |
553 | } |
554 | |
555 | bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { |
556 | MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); |
557 | |
558 | return findScratchRegister(TmpMBB, true); |
559 | } |
560 | |
561 | bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { |
562 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
563 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
564 | |
565 | // Abort if there is no register info or function info. |
566 | if (!RegInfo || !FI) |
567 | return false; |
568 | |
569 | // Only move the stack update on ELFv2 ABI and PPC64. |
570 | if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) |
571 | return false; |
572 | |
573 | // Check the frame size first and return false if it does not fit the |
574 | // requirements. |
575 | // We need a non-zero frame size as well as a frame that will fit in the red |
576 | // zone. This is because by moving the stack pointer update we are now storing |
577 | // to the red zone until the stack pointer is updated. If we get an interrupt |
578 | // inside the prologue but before the stack update we now have a number of |
579 | // stores to the red zone and those stores must all fit. |
580 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
581 | unsigned FrameSize = MFI.getStackSize(); |
582 | if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) |
583 | return false; |
584 | |
585 | // Frame pointers and base pointers complicate matters so don't do anything |
586 | // if we have them. For example having a frame pointer will sometimes require |
587 | // a copy of r1 into r31 and that makes keeping track of updates to r1 more |
588 | // difficult. Similar situation exists with setjmp. |
589 | if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) |
590 | return false; |
591 | |
592 | // Calls to fast_cc functions use different rules for passing parameters on |
593 | // the stack from the ABI and using PIC base in the function imposes |
594 | // similar restrictions to using the base pointer. It is not generally safe |
595 | // to move the stack pointer update in these situations. |
596 | if (FI->hasFastCall() || FI->usesPICBase()) |
597 | return false; |
598 | |
599 | // Finally we can move the stack update if we do not require register |
600 | // scavenging. Register scavenging can introduce more spills and so |
601 | // may make the frame size larger than we have computed. |
602 | return !RegInfo->requiresFrameIndexScavenging(MF); |
603 | } |
604 | |
605 | void PPCFrameLowering::emitPrologue(MachineFunction &MF, |
606 | MachineBasicBlock &MBB) const { |
607 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
608 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
609 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
610 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
611 | const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); |
612 | |
613 | MachineModuleInfo &MMI = MF.getMMI(); |
614 | const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); |
615 | DebugLoc dl; |
616 | // AIX assembler does not support cfi directives. |
617 | const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); |
618 | |
619 | // Get processor type. |
620 | bool isPPC64 = Subtarget.isPPC64(); |
621 | // Get the ABI. |
622 | bool isSVR4ABI = Subtarget.isSVR4ABI(); |
623 | bool isELFv2ABI = Subtarget.isELFv2ABI(); |
624 | assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.")(static_cast<void> (0)); |
625 | |
626 | // Work out frame sizes. |
627 | uint64_t FrameSize = determineFrameLayoutAndUpdate(MF); |
628 | int64_t NegFrameSize = -FrameSize; |
629 | if (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)) |
630 | llvm_unreachable("Unhandled stack size!")__builtin_unreachable(); |
631 | |
632 | if (MFI.isFrameAddressTaken()) |
633 | replaceFPWithRealFP(MF); |
634 | |
635 | // Check if the link register (LR) must be saved. |
636 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
637 | bool MustSaveLR = FI->mustSaveLR(); |
638 | bool MustSaveTOC = FI->mustSaveTOC(); |
639 | const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); |
640 | bool MustSaveCR = !MustSaveCRs.empty(); |
641 | // Do we have a frame pointer and/or base pointer for this function? |
642 | bool HasFP = hasFP(MF); |
643 | bool HasBP = RegInfo->hasBasePointer(MF); |
644 | bool HasRedZone = isPPC64 || !isSVR4ABI; |
645 | bool HasROPProtect = Subtarget.hasROPProtect(); |
646 | bool HasPrivileged = Subtarget.hasPrivileged(); |
647 | |
648 | Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; |
649 | Register BPReg = RegInfo->getBaseRegister(MF); |
650 | Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; |
651 | Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; |
652 | Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; |
653 | Register ScratchReg; |
654 | Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg |
655 | // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) |
656 | const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 |
657 | : PPC::MFLR ); |
658 | const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD |
659 | : PPC::STW ); |
660 | const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU |
661 | : PPC::STWU ); |
662 | const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX |
663 | : PPC::STWUX); |
664 | const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 |
665 | : PPC::LIS ); |
666 | const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 |
667 | : PPC::ORI ); |
668 | const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 |
669 | : PPC::OR ); |
670 | const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 |
671 | : PPC::SUBFC); |
672 | const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 |
673 | : PPC::SUBFIC); |
674 | const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 |
675 | : PPC::MFCR); |
676 | const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); |
677 | const MCInstrDesc &HashST = |
678 | TII.get(HasPrivileged ? PPC::HASHSTP : PPC::HASHST); |
679 | |
680 | // Regarding this assert: Even though LR is saved in the caller's frame (i.e., |
681 | // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no |
682 | // Red Zone, an asynchronous event (a form of "callee") could claim a frame & |
683 | // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. |
684 | assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&(static_cast<void> (0)) |
685 | "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.")(static_cast<void> (0)); |
686 | |
687 | // Using the same bool variable as below to suppress compiler warnings. |
688 | bool SingleScratchReg = findScratchRegister( |
689 | &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg); |
690 | assert(SingleScratchReg &&(static_cast<void> (0)) |
691 | "Required number of registers not available in this block")(static_cast<void> (0)); |
692 | |
693 | SingleScratchReg = ScratchReg == TempReg; |
694 | |
695 | int64_t LROffset = getReturnSaveOffset(); |
696 | |
697 | int64_t FPOffset = 0; |
698 | if (HasFP) { |
699 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
700 | int FPIndex = FI->getFramePointerSaveIndex(); |
701 | assert(FPIndex && "No Frame Pointer Save Slot!")(static_cast<void> (0)); |
702 | FPOffset = MFI.getObjectOffset(FPIndex); |
703 | } |
704 | |
705 | int64_t BPOffset = 0; |
706 | if (HasBP) { |
707 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
708 | int BPIndex = FI->getBasePointerSaveIndex(); |
709 | assert(BPIndex && "No Base Pointer Save Slot!")(static_cast<void> (0)); |
710 | BPOffset = MFI.getObjectOffset(BPIndex); |
711 | } |
712 | |
713 | int64_t PBPOffset = 0; |
714 | if (FI->usesPICBase()) { |
715 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
716 | int PBPIndex = FI->getPICBasePointerSaveIndex(); |
717 | assert(PBPIndex && "No PIC Base Pointer Save Slot!")(static_cast<void> (0)); |
718 | PBPOffset = MFI.getObjectOffset(PBPIndex); |
719 | } |
720 | |
721 | // Get stack alignments. |
722 | Align MaxAlign = MFI.getMaxAlign(); |
723 | if (HasBP && MaxAlign > 1) |
724 | assert(Log2(MaxAlign) < 16 && "Invalid alignment!")(static_cast<void> (0)); |
725 | |
726 | // Frames of 32KB & larger require special handling because they cannot be |
727 | // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. |
728 | bool isLargeFrame = !isInt<16>(NegFrameSize); |
729 | |
730 | // Check if we can move the stack update instruction (stdu) down the prologue |
731 | // past the callee saves. Hopefully this will avoid the situation where the |
732 | // saves are waiting for the update on the store with update to complete. |
733 | MachineBasicBlock::iterator StackUpdateLoc = MBBI; |
734 | bool MovingStackUpdateDown = false; |
735 | |
736 | // Check if we can move the stack update. |
737 | if (stackUpdateCanBeMoved(MF)) { |
738 | const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); |
739 | for (CalleeSavedInfo CSI : Info) { |
740 | // If the callee saved register is spilled to a register instead of the |
741 | // stack then the spill no longer uses the stack pointer. |
742 | // This can lead to two consequences: |
743 | // 1) We no longer need to update the stack because the function does not |
744 | // spill any callee saved registers to stack. |
745 | // 2) We have a situation where we still have to update the stack pointer |
746 | // even though some registers are spilled to other registers. In |
747 | // this case the current code moves the stack update to an incorrect |
748 | // position. |
749 | // In either case we should abort moving the stack update operation. |
750 | if (CSI.isSpilledToReg()) { |
751 | StackUpdateLoc = MBBI; |
752 | MovingStackUpdateDown = false; |
753 | break; |
754 | } |
755 | |
756 | int FrIdx = CSI.getFrameIdx(); |
757 | // If the frame index is not negative the callee saved info belongs to a |
758 | // stack object that is not a fixed stack object. We ignore non-fixed |
759 | // stack objects because we won't move the stack update pointer past them. |
760 | if (FrIdx >= 0) |
761 | continue; |
762 | |
763 | if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { |
764 | StackUpdateLoc++; |
765 | MovingStackUpdateDown = true; |
766 | } else { |
767 | // We need all of the Frame Indices to meet these conditions. |
768 | // If they do not, abort the whole operation. |
769 | StackUpdateLoc = MBBI; |
770 | MovingStackUpdateDown = false; |
771 | break; |
772 | } |
773 | } |
774 | |
775 | // If the operation was not aborted then update the object offset. |
776 | if (MovingStackUpdateDown) { |
777 | for (CalleeSavedInfo CSI : Info) { |
778 | int FrIdx = CSI.getFrameIdx(); |
779 | if (FrIdx < 0) |
780 | MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); |
781 | } |
782 | } |
783 | } |
784 | |
785 | // Where in the prologue we move the CR fields depends on how many scratch |
786 | // registers we have, and if we need to save the link register or not. This |
787 | // lambda is to avoid duplicating the logic in 2 places. |
788 | auto BuildMoveFromCR = [&]() { |
789 | if (isELFv2ABI && MustSaveCRs.size() == 1) { |
790 | // In the ELFv2 ABI, we are not required to save all CR fields. |
791 | // If only one CR field is clobbered, it is more efficient to use |
792 | // mfocrf to selectively save just that field, because mfocrf has short |
793 | // latency compares to mfcr. |
794 | assert(isPPC64 && "V2 ABI is 64-bit only.")(static_cast<void> (0)); |
795 | MachineInstrBuilder MIB = |
796 | BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); |
797 | MIB.addReg(MustSaveCRs[0], RegState::Kill); |
798 | } else { |
799 | MachineInstrBuilder MIB = |
800 | BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); |
801 | for (unsigned CRfield : MustSaveCRs) |
802 | MIB.addReg(CRfield, RegState::ImplicitKill); |
803 | } |
804 | }; |
805 | |
806 | // If we need to spill the CR and the LR but we don't have two separate |
807 | // registers available, we must spill them one at a time |
808 | if (MustSaveCR && SingleScratchReg && MustSaveLR) { |
809 | BuildMoveFromCR(); |
810 | BuildMI(MBB, MBBI, dl, StoreWordInst) |
811 | .addReg(TempReg, getKillRegState(true)) |
812 | .addImm(CRSaveOffset) |
813 | .addReg(SPReg); |
814 | } |
815 | |
816 | if (MustSaveLR) |
817 | BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); |
818 | |
819 | if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) |
820 | BuildMoveFromCR(); |
821 | |
822 | if (HasRedZone) { |
823 | if (HasFP) |
824 | BuildMI(MBB, MBBI, dl, StoreInst) |
825 | .addReg(FPReg) |
826 | .addImm(FPOffset) |
827 | .addReg(SPReg); |
828 | if (FI->usesPICBase()) |
829 | BuildMI(MBB, MBBI, dl, StoreInst) |
830 | .addReg(PPC::R30) |
831 | .addImm(PBPOffset) |
832 | .addReg(SPReg); |
833 | if (HasBP) |
834 | BuildMI(MBB, MBBI, dl, StoreInst) |
835 | .addReg(BPReg) |
836 | .addImm(BPOffset) |
837 | .addReg(SPReg); |
838 | } |
839 | |
840 | // Generate the instruction to store the LR. In the case where ROP protection |
841 | // is required the register holding the LR should not be killed as it will be |
842 | // used by the hash store instruction. |
843 | if (MustSaveLR) { |
844 | BuildMI(MBB, StackUpdateLoc, dl, StoreInst) |
845 | .addReg(ScratchReg, getKillRegState(!HasROPProtect)) |
846 | .addImm(LROffset) |
847 | .addReg(SPReg); |
848 | |
849 | // Add the ROP protection Hash Store instruction. |
850 | // NOTE: This is technically a violation of the ABI. The hash can be saved |
851 | // up to 512 bytes into the Protected Zone. This can be outside of the |
852 | // initial 288 byte volatile program storage region in the Protected Zone. |
853 | // However, this restriction will be removed in an upcoming revision of the |
854 | // ABI. |
855 | if (HasROPProtect) { |
856 | const int SaveIndex = FI->getROPProtectionHashSaveIndex(); |
857 | const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex); |
858 | assert((ImmOffset <= -8 && ImmOffset >= -512) &&(static_cast<void> (0)) |
859 | "ROP hash save offset out of range.")(static_cast<void> (0)); |
860 | assert(((ImmOffset & 0x7) == 0) &&(static_cast<void> (0)) |
861 | "ROP hash save offset must be 8 byte aligned.")(static_cast<void> (0)); |
862 | BuildMI(MBB, StackUpdateLoc, dl, HashST) |
863 | .addReg(ScratchReg, getKillRegState(true)) |
864 | .addImm(ImmOffset) |
865 | .addReg(SPReg); |
866 | } |
867 | } |
868 | |
869 | if (MustSaveCR && |
870 | !(SingleScratchReg && MustSaveLR)) { |
871 | assert(HasRedZone && "A red zone is always available on PPC64")(static_cast<void> (0)); |
872 | BuildMI(MBB, MBBI, dl, StoreWordInst) |
873 | .addReg(TempReg, getKillRegState(true)) |
874 | .addImm(CRSaveOffset) |
875 | .addReg(SPReg); |
876 | } |
877 | |
878 | // Skip the rest if this is a leaf function & all spills fit in the Red Zone. |
879 | if (!FrameSize) |
880 | return; |
881 | |
882 | // Adjust stack pointer: r1 += NegFrameSize. |
883 | // If there is a preferred stack alignment, align R1 now |
884 | |
885 | if (HasBP && HasRedZone) { |
886 | // Save a copy of r1 as the base pointer. |
887 | BuildMI(MBB, MBBI, dl, OrInst, BPReg) |
888 | .addReg(SPReg) |
889 | .addReg(SPReg); |
890 | } |
891 | |
892 | // Have we generated a STUX instruction to claim stack frame? If so, |
893 | // the negated frame size will be placed in ScratchReg. |
894 | bool HasSTUX = false; |
895 | |
896 | // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain |
897 | // pointer is always stored at SP, we will get a free probe due to an essential |
898 | // STU(X) instruction. |
899 | if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { |
900 | // To be consistent with other targets, a pseudo instruction is emitted and |
901 | // will be later expanded in `inlineStackProbe`. |
902 | BuildMI(MBB, MBBI, dl, |
903 | TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 |
904 | : PPC::PROBED_STACKALLOC_32)) |
905 | .addDef(TempReg) |
906 | .addDef(ScratchReg) // ScratchReg stores the old sp. |
907 | .addImm(NegFrameSize); |
908 | // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we |
909 | // update the ScratchReg to meet the assumption that ScratchReg contains |
910 | // the NegFrameSize. This solution is rather tricky. |
911 | if (!HasRedZone) { |
912 | BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) |
913 | .addReg(ScratchReg) |
914 | .addReg(SPReg); |
915 | HasSTUX = true; |
916 | } |
917 | } else { |
918 | // This condition must be kept in sync with canUseAsPrologue. |
919 | if (HasBP && MaxAlign > 1) { |
920 | if (isPPC64) |
921 | BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) |
922 | .addReg(SPReg) |
923 | .addImm(0) |
924 | .addImm(64 - Log2(MaxAlign)); |
925 | else // PPC32... |
926 | BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) |
927 | .addReg(SPReg) |
928 | .addImm(0) |
929 | .addImm(32 - Log2(MaxAlign)) |
930 | .addImm(31); |
931 | if (!isLargeFrame) { |
932 | BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) |
933 | .addReg(ScratchReg, RegState::Kill) |
934 | .addImm(NegFrameSize); |
935 | } else { |
936 | assert(!SingleScratchReg && "Only a single scratch reg available")(static_cast<void> (0)); |
937 | BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) |
938 | .addImm(NegFrameSize >> 16); |
939 | BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) |
940 | .addReg(TempReg, RegState::Kill) |
941 | .addImm(NegFrameSize & 0xFFFF); |
942 | BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) |
943 | .addReg(ScratchReg, RegState::Kill) |
944 | .addReg(TempReg, RegState::Kill); |
945 | } |
946 | |
947 | BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) |
948 | .addReg(SPReg, RegState::Kill) |
949 | .addReg(SPReg) |
950 | .addReg(ScratchReg); |
951 | HasSTUX = true; |
952 | |
953 | } else if (!isLargeFrame) { |
954 | BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) |
955 | .addReg(SPReg) |
956 | .addImm(NegFrameSize) |
957 | .addReg(SPReg); |
958 | |
959 | } else { |
960 | BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) |
961 | .addImm(NegFrameSize >> 16); |
962 | BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) |
963 | .addReg(ScratchReg, RegState::Kill) |
964 | .addImm(NegFrameSize & 0xFFFF); |
965 | BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) |
966 | .addReg(SPReg, RegState::Kill) |
967 | .addReg(SPReg) |
968 | .addReg(ScratchReg); |
969 | HasSTUX = true; |
970 | } |
971 | } |
972 | |
973 | // Save the TOC register after the stack pointer update if a prologue TOC |
974 | // save is required for the function. |
975 | if (MustSaveTOC) { |
976 | assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2")(static_cast<void> (0)); |
977 | BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) |
978 | .addReg(TOCReg, getKillRegState(true)) |
979 | .addImm(TOCSaveOffset) |
980 | .addReg(SPReg); |
981 | } |
982 | |
983 | if (!HasRedZone) { |
984 | assert(!isPPC64 && "A red zone is always available on PPC64")(static_cast<void> (0)); |
985 | if (HasSTUX) { |
986 | // The negated frame size is in ScratchReg, and the SPReg has been |
987 | // decremented by the frame size: SPReg = old SPReg + ScratchReg. |
988 | // Since FPOffset, PBPOffset, etc. are relative to the beginning of |
989 | // the stack frame (i.e. the old SP), ideally, we would put the old |
990 | // SP into a register and use it as the base for the stores. The |
991 | // problem is that the only available register may be ScratchReg, |
992 | // which could be R0, and R0 cannot be used as a base address. |
993 | |
994 | // First, set ScratchReg to the old SP. This may need to be modified |
995 | // later. |
996 | BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) |
997 | .addReg(ScratchReg, RegState::Kill) |
998 | .addReg(SPReg); |
999 | |
1000 | if (ScratchReg == PPC::R0) { |
1001 | // R0 cannot be used as a base register, but it can be used as an |
1002 | // index in a store-indexed. |
1003 | int LastOffset = 0; |
1004 | if (HasFP) { |
1005 | // R0 += (FPOffset-LastOffset). |
1006 | // Need addic, since addi treats R0 as 0. |
1007 | BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) |
1008 | .addReg(ScratchReg) |
1009 | .addImm(FPOffset-LastOffset); |
1010 | LastOffset = FPOffset; |
1011 | // Store FP into *R0. |
1012 | BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) |
1013 | .addReg(FPReg, RegState::Kill) // Save FP. |
1014 | .addReg(PPC::ZERO) |
1015 | .addReg(ScratchReg); // This will be the index (R0 is ok here). |
1016 | } |
1017 | if (FI->usesPICBase()) { |
1018 | // R0 += (PBPOffset-LastOffset). |
1019 | BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) |
1020 | .addReg(ScratchReg) |
1021 | .addImm(PBPOffset-LastOffset); |
1022 | LastOffset = PBPOffset; |
1023 | BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) |
1024 | .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. |
1025 | .addReg(PPC::ZERO) |
1026 | .addReg(ScratchReg); // This will be the index (R0 is ok here). |
1027 | } |
1028 | if (HasBP) { |
1029 | // R0 += (BPOffset-LastOffset). |
1030 | BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) |
1031 | .addReg(ScratchReg) |
1032 | .addImm(BPOffset-LastOffset); |
1033 | LastOffset = BPOffset; |
1034 | BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) |
1035 | .addReg(BPReg, RegState::Kill) // Save BP. |
1036 | .addReg(PPC::ZERO) |
1037 | .addReg(ScratchReg); // This will be the index (R0 is ok here). |
1038 | // BP = R0-LastOffset |
1039 | BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) |
1040 | .addReg(ScratchReg, RegState::Kill) |
1041 | .addImm(-LastOffset); |
1042 | } |
1043 | } else { |
1044 | // ScratchReg is not R0, so use it as the base register. It is |
1045 | // already set to the old SP, so we can use the offsets directly. |
1046 | |
1047 | // Now that the stack frame has been allocated, save all the necessary |
1048 | // registers using ScratchReg as the base address. |
1049 | if (HasFP) |
1050 | BuildMI(MBB, MBBI, dl, StoreInst) |
1051 | .addReg(FPReg) |
1052 | .addImm(FPOffset) |
1053 | .addReg(ScratchReg); |
1054 | if (FI->usesPICBase()) |
1055 | BuildMI(MBB, MBBI, dl, StoreInst) |
1056 | .addReg(PPC::R30) |
1057 | .addImm(PBPOffset) |
1058 | .addReg(ScratchReg); |
1059 | if (HasBP) { |
1060 | BuildMI(MBB, MBBI, dl, StoreInst) |
1061 | .addReg(BPReg) |
1062 | .addImm(BPOffset) |
1063 | .addReg(ScratchReg); |
1064 | BuildMI(MBB, MBBI, dl, OrInst, BPReg) |
1065 | .addReg(ScratchReg, RegState::Kill) |
1066 | .addReg(ScratchReg); |
1067 | } |
1068 | } |
1069 | } else { |
1070 | // The frame size is a known 16-bit constant (fitting in the immediate |
1071 | // field of STWU). To be here we have to be compiling for PPC32. |
1072 | // Since the SPReg has been decreased by FrameSize, add it back to each |
1073 | // offset. |
1074 | if (HasFP) |
1075 | BuildMI(MBB, MBBI, dl, StoreInst) |
1076 | .addReg(FPReg) |
1077 | .addImm(FrameSize + FPOffset) |
1078 | .addReg(SPReg); |
1079 | if (FI->usesPICBase()) |
1080 | BuildMI(MBB, MBBI, dl, StoreInst) |
1081 | .addReg(PPC::R30) |
1082 | .addImm(FrameSize + PBPOffset) |
1083 | .addReg(SPReg); |
1084 | if (HasBP) { |
1085 | BuildMI(MBB, MBBI, dl, StoreInst) |
1086 | .addReg(BPReg) |
1087 | .addImm(FrameSize + BPOffset) |
1088 | .addReg(SPReg); |
1089 | BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) |
1090 | .addReg(SPReg) |
1091 | .addImm(FrameSize); |
1092 | } |
1093 | } |
1094 | } |
1095 | |
1096 | // Add Call Frame Information for the instructions we generated above. |
1097 | if (needsCFI) { |
1098 | unsigned CFIIndex; |
1099 | |
1100 | if (HasBP) { |
1101 | // Define CFA in terms of BP. Do this in preference to using FP/SP, |
1102 | // because if the stack needed aligning then CFA won't be at a fixed |
1103 | // offset from FP/SP. |
1104 | unsigned Reg = MRI->getDwarfRegNum(BPReg, true); |
1105 | CFIIndex = MF.addFrameInst( |
1106 | MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); |
1107 | } else { |
1108 | // Adjust the definition of CFA to account for the change in SP. |
1109 | assert(NegFrameSize)(static_cast<void> (0)); |
1110 | CFIIndex = MF.addFrameInst( |
1111 | MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); |
1112 | } |
1113 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1114 | .addCFIIndex(CFIIndex); |
1115 | |
1116 | if (HasFP) { |
1117 | // Describe where FP was saved, at a fixed offset from CFA. |
1118 | unsigned Reg = MRI->getDwarfRegNum(FPReg, true); |
1119 | CFIIndex = MF.addFrameInst( |
1120 | MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); |
1121 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1122 | .addCFIIndex(CFIIndex); |
1123 | } |
1124 | |
1125 | if (FI->usesPICBase()) { |
1126 | // Describe where FP was saved, at a fixed offset from CFA. |
1127 | unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); |
1128 | CFIIndex = MF.addFrameInst( |
1129 | MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); |
1130 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1131 | .addCFIIndex(CFIIndex); |
1132 | } |
1133 | |
1134 | if (HasBP) { |
1135 | // Describe where BP was saved, at a fixed offset from CFA. |
1136 | unsigned Reg = MRI->getDwarfRegNum(BPReg, true); |
1137 | CFIIndex = MF.addFrameInst( |
1138 | MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); |
1139 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1140 | .addCFIIndex(CFIIndex); |
1141 | } |
1142 | |
1143 | if (MustSaveLR) { |
1144 | // Describe where LR was saved, at a fixed offset from CFA. |
1145 | unsigned Reg = MRI->getDwarfRegNum(LRReg, true); |
1146 | CFIIndex = MF.addFrameInst( |
1147 | MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); |
1148 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1149 | .addCFIIndex(CFIIndex); |
1150 | } |
1151 | } |
1152 | |
1153 | // If there is a frame pointer, copy R1 into R31 |
1154 | if (HasFP) { |
1155 | BuildMI(MBB, MBBI, dl, OrInst, FPReg) |
1156 | .addReg(SPReg) |
1157 | .addReg(SPReg); |
1158 | |
1159 | if (!HasBP && needsCFI) { |
1160 | // Change the definition of CFA from SP+offset to FP+offset, because SP |
1161 | // will change at every alloca. |
1162 | unsigned Reg = MRI->getDwarfRegNum(FPReg, true); |
1163 | unsigned CFIIndex = MF.addFrameInst( |
1164 | MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); |
1165 | |
1166 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1167 | .addCFIIndex(CFIIndex); |
1168 | } |
1169 | } |
1170 | |
1171 | if (needsCFI) { |
1172 | // Describe where callee saved registers were saved, at fixed offsets from |
1173 | // CFA. |
1174 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
1175 | for (unsigned I = 0, E = CSI.size(); I != E; ++I) { |
1176 | unsigned Reg = CSI[I].getReg(); |
1177 | if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; |
1178 | |
1179 | // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just |
1180 | // subregisters of CR2. We just need to emit a move of CR2. |
1181 | if (PPC::CRBITRCRegClass.contains(Reg)) |
1182 | continue; |
1183 | |
1184 | if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) |
1185 | continue; |
1186 | |
1187 | // For SVR4, don't emit a move for the CR spill slot if we haven't |
1188 | // spilled CRs. |
1189 | if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) |
1190 | && !MustSaveCR) |
1191 | continue; |
1192 | |
1193 | // For 64-bit SVR4 when we have spilled CRs, the spill location |
1194 | // is SP+8, not a frame-relative slot. |
1195 | if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { |
1196 | // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for |
1197 | // the whole CR word. In the ELFv2 ABI, every CR that was |
1198 | // actually saved gets its own CFI record. |
1199 | unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; |
1200 | unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( |
1201 | nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); |
1202 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1203 | .addCFIIndex(CFIIndex); |
1204 | continue; |
1205 | } |
1206 | |
1207 | if (CSI[I].isSpilledToReg()) { |
1208 | unsigned SpilledReg = CSI[I].getDstReg(); |
1209 | unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( |
1210 | nullptr, MRI->getDwarfRegNum(Reg, true), |
1211 | MRI->getDwarfRegNum(SpilledReg, true))); |
1212 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1213 | .addCFIIndex(CFIRegister); |
1214 | } else { |
1215 | int64_t Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); |
1216 | // We have changed the object offset above but we do not want to change |
1217 | // the actual offsets in the CFI instruction so we have to undo the |
1218 | // offset change here. |
1219 | if (MovingStackUpdateDown) |
1220 | Offset -= NegFrameSize; |
1221 | |
1222 | unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( |
1223 | nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); |
1224 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1225 | .addCFIIndex(CFIIndex); |
1226 | } |
1227 | } |
1228 | } |
1229 | } |
1230 | |
1231 | void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, |
1232 | MachineBasicBlock &PrologMBB) const { |
1233 | bool isPPC64 = Subtarget.isPPC64(); |
1234 | const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); |
1235 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
1236 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1237 | MachineModuleInfo &MMI = MF.getMMI(); |
1238 | const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); |
1239 | // AIX assembler does not support cfi directives. |
1240 | const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); |
1241 | auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { |
1242 | int Opc = MI.getOpcode(); |
1243 | return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; |
1244 | }); |
1245 | if (StackAllocMIPos == PrologMBB.end()) |
1246 | return; |
1247 | const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); |
1248 | MachineBasicBlock *CurrentMBB = &PrologMBB; |
1249 | DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); |
1250 | MachineInstr &MI = *StackAllocMIPos; |
1251 | int64_t NegFrameSize = MI.getOperand(2).getImm(); |
1252 | unsigned ProbeSize = TLI.getStackProbeSize(MF); |
1253 | int64_t NegProbeSize = -(int64_t)ProbeSize; |
1254 | assert(isInt<32>(NegProbeSize) && "Unhandled probe size")(static_cast<void> (0)); |
1255 | int64_t NumBlocks = NegFrameSize / NegProbeSize; |
1256 | int64_t NegResidualSize = NegFrameSize % NegProbeSize; |
1257 | Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; |
1258 | Register ScratchReg = MI.getOperand(0).getReg(); |
1259 | Register FPReg = MI.getOperand(1).getReg(); |
1260 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
1261 | bool HasBP = RegInfo->hasBasePointer(MF); |
1262 | Register BPReg = RegInfo->getBaseRegister(MF); |
1263 | Align MaxAlign = MFI.getMaxAlign(); |
1264 | bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); |
1265 | const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); |
1266 | // Subroutines to generate .cfi_* directives. |
1267 | auto buildDefCFAReg = [&](MachineBasicBlock &MBB, |
1268 | MachineBasicBlock::iterator MBBI, Register Reg) { |
1269 | unsigned RegNum = MRI->getDwarfRegNum(Reg, true); |
1270 | unsigned CFIIndex = MF.addFrameInst( |
1271 | MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); |
1272 | BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1273 | .addCFIIndex(CFIIndex); |
1274 | }; |
1275 | auto buildDefCFA = [&](MachineBasicBlock &MBB, |
1276 | MachineBasicBlock::iterator MBBI, Register Reg, |
1277 | int Offset) { |
1278 | unsigned RegNum = MRI->getDwarfRegNum(Reg, true); |
1279 | unsigned CFIIndex = MBB.getParent()->addFrameInst( |
1280 | MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); |
1281 | BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1282 | .addCFIIndex(CFIIndex); |
1283 | }; |
1284 | // Subroutine to determine if we can use the Imm as part of d-form. |
1285 | auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; |
1286 | // Subroutine to materialize the Imm into TempReg. |
1287 | auto MaterializeImm = [&](MachineBasicBlock &MBB, |
1288 | MachineBasicBlock::iterator MBBI, int64_t Imm, |
1289 | Register &TempReg) { |
1290 | assert(isInt<32>(Imm) && "Unhandled imm")(static_cast<void> (0)); |
1291 | if (isInt<16>(Imm)) |
1292 | BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg) |
1293 | .addImm(Imm); |
1294 | else { |
1295 | BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) |
1296 | .addImm(Imm >> 16); |
1297 | BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg) |
1298 | .addReg(TempReg) |
1299 | .addImm(Imm & 0xFFFF); |
1300 | } |
1301 | }; |
1302 | // Subroutine to store frame pointer and decrease stack pointer by probe size. |
1303 | auto allocateAndProbe = [&](MachineBasicBlock &MBB, |
1304 | MachineBasicBlock::iterator MBBI, int64_t NegSize, |
1305 | Register NegSizeReg, bool UseDForm, |
1306 | Register StoreReg) { |
1307 | if (UseDForm) |
1308 | BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) |
1309 | .addReg(StoreReg) |
1310 | .addImm(NegSize) |
1311 | .addReg(SPReg); |
1312 | else |
1313 | BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) |
1314 | .addReg(StoreReg) |
1315 | .addReg(SPReg) |
1316 | .addReg(NegSizeReg); |
1317 | }; |
1318 | // Used to probe stack when realignment is required. |
1319 | // Note that, according to ABI's requirement, *sp must always equals the |
1320 | // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. |
1321 | // Following is pseudo code: |
1322 | // final_sp = (sp & align) + negframesize; |
1323 | // neg_gap = final_sp - sp; |
1324 | // while (neg_gap < negprobesize) { |
1325 | // stdu fp, negprobesize(sp); |
1326 | // neg_gap -= negprobesize; |
1327 | // } |
1328 | // stdux fp, sp, neg_gap |
1329 | // |
1330 | // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg |
1331 | // before probe code, we don't need to save it, so we get one additional reg |
1332 | // that can be used to materialize the probeside if needed to use xform. |
1333 | // Otherwise, we can NOT materialize probeside, so we can only use Dform for |
1334 | // now. |
1335 | // |
1336 | // The allocations are: |
1337 | // if (HasBP && HasRedzone) { |
1338 | // r0: materialize the probesize if needed so that we can use xform. |
1339 | // r12: `neg_gap` |
1340 | // } else { |
1341 | // r0: back-chain pointer |
1342 | // r12: `neg_gap`. |
1343 | // } |
1344 | auto probeRealignedStack = [&](MachineBasicBlock &MBB, |
1345 | MachineBasicBlock::iterator MBBI, |
1346 | Register ScratchReg, Register TempReg) { |
1347 | assert(HasBP && "The function is supposed to have base pointer when its "(static_cast<void> (0)) |
1348 | "stack is realigned.")(static_cast<void> (0)); |
1349 | assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2")(static_cast<void> (0)); |
1350 | |
1351 | // FIXME: We can eliminate this limitation if we get more infomation about |
1352 | // which part of redzone are already used. Used redzone can be treated |
1353 | // probed. But there might be `holes' in redzone probed, this could |
1354 | // complicate the implementation. |
1355 | assert(ProbeSize >= Subtarget.getRedZoneSize() &&(static_cast<void> (0)) |
1356 | "Probe size should be larger or equal to the size of red-zone so "(static_cast<void> (0)) |
1357 | "that red-zone is not clobbered by probing.")(static_cast<void> (0)); |
1358 | |
1359 | Register &FinalStackPtr = TempReg; |
1360 | // FIXME: We only support NegProbeSize materializable by DForm currently. |
1361 | // When HasBP && HasRedzone, we can use xform if we have an additional idle |
1362 | // register. |
1363 | NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15)); |
1364 | assert(isInt<16>(NegProbeSize) &&(static_cast<void> (0)) |
1365 | "NegProbeSize should be materializable by DForm")(static_cast<void> (0)); |
1366 | Register CRReg = PPC::CR0; |
1367 | // Layout of output assembly kinda like: |
1368 | // bb.0: |
1369 | // ... |
1370 | // sub $scratchreg, $finalsp, r1 |
1371 | // cmpdi $scratchreg, <negprobesize> |
1372 | // bge bb.2 |
1373 | // bb.1: |
1374 | // stdu <backchain>, <negprobesize>(r1) |
1375 | // sub $scratchreg, $scratchreg, negprobesize |
1376 | // cmpdi $scratchreg, <negprobesize> |
1377 | // blt bb.1 |
1378 | // bb.2: |
1379 | // stdux <backchain>, r1, $scratchreg |
1380 | MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); |
1381 | MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); |
1382 | MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); |
1383 | MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); |
1384 | MF.insert(MBBInsertPoint, ProbeExitMBB); |
1385 | // bb.2 |
1386 | { |
1387 | Register BackChainPointer = HasRedZone ? BPReg : TempReg; |
1388 | allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, |
1389 | BackChainPointer); |
1390 | if (HasRedZone) |
1391 | // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg |
1392 | // to TempReg to satisfy it. |
1393 | BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg) |
1394 | .addReg(BPReg) |
1395 | .addReg(BPReg); |
1396 | ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); |
1397 | ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); |
1398 | } |
1399 | // bb.0 |
1400 | { |
1401 | BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg) |
1402 | .addReg(SPReg) |
1403 | .addReg(FinalStackPtr); |
1404 | if (!HasRedZone) |
1405 | BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg); |
1406 | BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg) |
1407 | .addReg(ScratchReg) |
1408 | .addImm(NegProbeSize); |
1409 | BuildMI(&MBB, DL, TII.get(PPC::BCC)) |
1410 | .addImm(PPC::PRED_GE) |
1411 | .addReg(CRReg) |
1412 | .addMBB(ProbeExitMBB); |
1413 | MBB.addSuccessor(ProbeLoopBodyMBB); |
1414 | MBB.addSuccessor(ProbeExitMBB); |
1415 | } |
1416 | // bb.1 |
1417 | { |
1418 | Register BackChainPointer = HasRedZone ? BPReg : TempReg; |
1419 | allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, |
1420 | 0, true /*UseDForm*/, BackChainPointer); |
1421 | BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI), |
1422 | ScratchReg) |
1423 | .addReg(ScratchReg) |
1424 | .addImm(-NegProbeSize); |
1425 | BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), |
1426 | CRReg) |
1427 | .addReg(ScratchReg) |
1428 | .addImm(NegProbeSize); |
1429 | BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) |
1430 | .addImm(PPC::PRED_LT) |
1431 | .addReg(CRReg) |
1432 | .addMBB(ProbeLoopBodyMBB); |
1433 | ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); |
1434 | ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); |
1435 | } |
1436 | // Update liveins. |
1437 | recomputeLiveIns(*ProbeLoopBodyMBB); |
1438 | recomputeLiveIns(*ProbeExitMBB); |
1439 | return ProbeExitMBB; |
1440 | }; |
1441 | // For case HasBP && MaxAlign > 1, we have to realign the SP by performing |
1442 | // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since |
1443 | // the offset subtracted from SP is determined by SP's runtime value. |
1444 | if (HasBP && MaxAlign > 1) { |
1445 | // Calculate final stack pointer. |
1446 | if (isPPC64) |
1447 | BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) |
1448 | .addReg(SPReg) |
1449 | .addImm(0) |
1450 | .addImm(64 - Log2(MaxAlign)); |
1451 | else |
1452 | BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) |
1453 | .addReg(SPReg) |
1454 | .addImm(0) |
1455 | .addImm(32 - Log2(MaxAlign)) |
1456 | .addImm(31); |
1457 | BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), |
1458 | FPReg) |
1459 | .addReg(ScratchReg) |
1460 | .addReg(SPReg); |
1461 | MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); |
1462 | BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4), |
1463 | FPReg) |
1464 | .addReg(ScratchReg) |
1465 | .addReg(FPReg); |
1466 | CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); |
1467 | if (needsCFI) |
1468 | buildDefCFAReg(*CurrentMBB, {MI}, FPReg); |
1469 | } else { |
1470 | // Initialize current frame pointer. |
1471 | BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); |
1472 | // Use FPReg to calculate CFA. |
1473 | if (needsCFI) |
1474 | buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); |
1475 | // Probe residual part. |
1476 | if (NegResidualSize) { |
1477 | bool ResidualUseDForm = CanUseDForm(NegResidualSize); |
1478 | if (!ResidualUseDForm) |
1479 | MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); |
1480 | allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, |
1481 | ResidualUseDForm, FPReg); |
1482 | } |
1483 | bool UseDForm = CanUseDForm(NegProbeSize); |
1484 | // If number of blocks is small, just probe them directly. |
1485 | if (NumBlocks < 3) { |
1486 | if (!UseDForm) |
1487 | MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); |
1488 | for (int i = 0; i < NumBlocks; ++i) |
1489 | allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, |
1490 | FPReg); |
1491 | if (needsCFI) { |
1492 | // Restore using SPReg to calculate CFA. |
1493 | buildDefCFAReg(*CurrentMBB, {MI}, SPReg); |
1494 | } |
1495 | } else { |
1496 | // Since CTR is a volatile register and current shrinkwrap implementation |
1497 | // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a |
1498 | // CTR loop to probe. |
1499 | // Calculate trip count and stores it in CTRReg. |
1500 | MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); |
1501 | BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) |
1502 | .addReg(ScratchReg, RegState::Kill); |
1503 | if (!UseDForm) |
1504 | MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); |
1505 | // Create MBBs of the loop. |
1506 | MachineFunction::iterator MBBInsertPoint = |
1507 | std::next(CurrentMBB->getIterator()); |
1508 | MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); |
1509 | MF.insert(MBBInsertPoint, LoopMBB); |
1510 | MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); |
1511 | MF.insert(MBBInsertPoint, ExitMBB); |
1512 | // Synthesize the loop body. |
1513 | allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, |
1514 | UseDForm, FPReg); |
1515 | BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) |
1516 | .addMBB(LoopMBB); |
1517 | LoopMBB->addSuccessor(ExitMBB); |
1518 | LoopMBB->addSuccessor(LoopMBB); |
1519 | // Synthesize the exit MBB. |
1520 | ExitMBB->splice(ExitMBB->end(), CurrentMBB, |
1521 | std::next(MachineBasicBlock::iterator(MI)), |
1522 | CurrentMBB->end()); |
1523 | ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); |
1524 | CurrentMBB->addSuccessor(LoopMBB); |
1525 | if (needsCFI) { |
1526 | // Restore using SPReg to calculate CFA. |
1527 | buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); |
1528 | } |
1529 | // Update liveins. |
1530 | recomputeLiveIns(*LoopMBB); |
1531 | recomputeLiveIns(*ExitMBB); |
1532 | } |
1533 | } |
1534 | ++NumPrologProbed; |
1535 | MI.eraseFromParent(); |
1536 | } |
1537 | |
1538 | void PPCFrameLowering::emitEpilogue(MachineFunction &MF, |
1539 | MachineBasicBlock &MBB) const { |
1540 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
1541 | DebugLoc dl; |
1542 | |
1543 | if (MBBI != MBB.end()) |
1544 | dl = MBBI->getDebugLoc(); |
1545 | |
1546 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
1547 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
1548 | |
1549 | // Get alignment info so we know how to restore the SP. |
1550 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1551 | |
1552 | // Get the number of bytes allocated from the FrameInfo. |
1553 | int64_t FrameSize = MFI.getStackSize(); |
1554 | |
1555 | // Get processor type. |
1556 | bool isPPC64 = Subtarget.isPPC64(); |
1557 | |
1558 | // Check if the link register (LR) has been saved. |
1559 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
1560 | bool MustSaveLR = FI->mustSaveLR(); |
1561 | const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); |
1562 | bool MustSaveCR = !MustSaveCRs.empty(); |
1563 | // Do we have a frame pointer and/or base pointer for this function? |
1564 | bool HasFP = hasFP(MF); |
1565 | bool HasBP = RegInfo->hasBasePointer(MF); |
1566 | bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); |
1567 | bool HasROPProtect = Subtarget.hasROPProtect(); |
1568 | bool HasPrivileged = Subtarget.hasPrivileged(); |
1569 | |
1570 | Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; |
1571 | Register BPReg = RegInfo->getBaseRegister(MF); |
1572 | Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; |
1573 | Register ScratchReg; |
1574 | Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg |
1575 | const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 |
1576 | : PPC::MTLR ); |
1577 | const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD |
1578 | : PPC::LWZ ); |
1579 | const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 |
1580 | : PPC::LIS ); |
1581 | const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 |
1582 | : PPC::OR ); |
1583 | const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 |
1584 | : PPC::ORI ); |
1585 | const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 |
1586 | : PPC::ADDI ); |
1587 | const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 |
1588 | : PPC::ADD4 ); |
1589 | const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 |
1590 | : PPC::LWZ); |
1591 | const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 |
1592 | : PPC::MTOCRF); |
1593 | const MCInstrDesc &HashChk = |
1594 | TII.get(HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK); |
1595 | int64_t LROffset = getReturnSaveOffset(); |
1596 | |
1597 | int64_t FPOffset = 0; |
1598 | |
1599 | // Using the same bool variable as below to suppress compiler warnings. |
1600 | bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, |
Value stored to 'SingleScratchReg' during its initialization is never read | |
1601 | &TempReg); |
1602 | assert(SingleScratchReg &&(static_cast<void> (0)) |
1603 | "Could not find an available scratch register")(static_cast<void> (0)); |
1604 | |
1605 | SingleScratchReg = ScratchReg == TempReg; |
1606 | |
1607 | if (HasFP) { |
1608 | int FPIndex = FI->getFramePointerSaveIndex(); |
1609 | assert(FPIndex && "No Frame Pointer Save Slot!")(static_cast<void> (0)); |
1610 | FPOffset = MFI.getObjectOffset(FPIndex); |
1611 | } |
1612 | |
1613 | int64_t BPOffset = 0; |
1614 | if (HasBP) { |
1615 | int BPIndex = FI->getBasePointerSaveIndex(); |
1616 | assert(BPIndex && "No Base Pointer Save Slot!")(static_cast<void> (0)); |
1617 | BPOffset = MFI.getObjectOffset(BPIndex); |
1618 | } |
1619 | |
1620 | int64_t PBPOffset = 0; |
1621 | if (FI->usesPICBase()) { |
1622 | int PBPIndex = FI->getPICBasePointerSaveIndex(); |
1623 | assert(PBPIndex && "No PIC Base Pointer Save Slot!")(static_cast<void> (0)); |
1624 | PBPOffset = MFI.getObjectOffset(PBPIndex); |
1625 | } |
1626 | |
1627 | bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); |
1628 | |
1629 | if (IsReturnBlock) { |
1630 | unsigned RetOpcode = MBBI->getOpcode(); |
1631 | bool UsesTCRet = RetOpcode == PPC::TCRETURNri || |
1632 | RetOpcode == PPC::TCRETURNdi || |
1633 | RetOpcode == PPC::TCRETURNai || |
1634 | RetOpcode == PPC::TCRETURNri8 || |
1635 | RetOpcode == PPC::TCRETURNdi8 || |
1636 | RetOpcode == PPC::TCRETURNai8; |
1637 | |
1638 | if (UsesTCRet) { |
1639 | int MaxTCRetDelta = FI->getTailCallSPDelta(); |
1640 | MachineOperand &StackAdjust = MBBI->getOperand(1); |
1641 | assert(StackAdjust.isImm() && "Expecting immediate value.")(static_cast<void> (0)); |
1642 | // Adjust stack pointer. |
1643 | int StackAdj = StackAdjust.getImm(); |
1644 | int Delta = StackAdj - MaxTCRetDelta; |
1645 | assert((Delta >= 0) && "Delta must be positive")(static_cast<void> (0)); |
1646 | if (MaxTCRetDelta>0) |
1647 | FrameSize += (StackAdj +Delta); |
1648 | else |
1649 | FrameSize += StackAdj; |
1650 | } |
1651 | } |
1652 | |
1653 | // Frames of 32KB & larger require special handling because they cannot be |
1654 | // indexed into with a simple LD/LWZ immediate offset operand. |
1655 | bool isLargeFrame = !isInt<16>(FrameSize); |
1656 | |
1657 | // On targets without red zone, the SP needs to be restored last, so that |
1658 | // all live contents of the stack frame are upwards of the SP. This means |
1659 | // that we cannot restore SP just now, since there may be more registers |
1660 | // to restore from the stack frame (e.g. R31). If the frame size is not |
1661 | // a simple immediate value, we will need a spare register to hold the |
1662 | // restored SP. If the frame size is known and small, we can simply adjust |
1663 | // the offsets of the registers to be restored, and still use SP to restore |
1664 | // them. In such case, the final update of SP will be to add the frame |
1665 | // size to it. |
1666 | // To simplify the code, set RBReg to the base register used to restore |
1667 | // values from the stack, and set SPAdd to the value that needs to be added |
1668 | // to the SP at the end. The default values are as if red zone was present. |
1669 | unsigned RBReg = SPReg; |
1670 | unsigned SPAdd = 0; |
1671 | |
1672 | // Check if we can move the stack update instruction up the epilogue |
1673 | // past the callee saves. This will allow the move to LR instruction |
1674 | // to be executed before the restores of the callee saves which means |
1675 | // that the callee saves can hide the latency from the MTLR instrcution. |
1676 | MachineBasicBlock::iterator StackUpdateLoc = MBBI; |
1677 | if (stackUpdateCanBeMoved(MF)) { |
1678 | const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); |
1679 | for (CalleeSavedInfo CSI : Info) { |
1680 | // If the callee saved register is spilled to another register abort the |
1681 | // stack update movement. |
1682 | if (CSI.isSpilledToReg()) { |
1683 | StackUpdateLoc = MBBI; |
1684 | break; |
1685 | } |
1686 | int FrIdx = CSI.getFrameIdx(); |
1687 | // If the frame index is not negative the callee saved info belongs to a |
1688 | // stack object that is not a fixed stack object. We ignore non-fixed |
1689 | // stack objects because we won't move the update of the stack pointer |
1690 | // past them. |
1691 | if (FrIdx >= 0) |
1692 | continue; |
1693 | |
1694 | if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) |
1695 | StackUpdateLoc--; |
1696 | else { |
1697 | // Abort the operation as we can't update all CSR restores. |
1698 | StackUpdateLoc = MBBI; |
1699 | break; |
1700 | } |
1701 | } |
1702 | } |
1703 | |
1704 | if (FrameSize) { |
1705 | // In the prologue, the loaded (or persistent) stack pointer value is |
1706 | // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red |
1707 | // zone add this offset back now. |
1708 | |
1709 | // If the function has a base pointer, the stack pointer has been copied |
1710 | // to it so we can restore it by copying in the other direction. |
1711 | if (HasRedZone && HasBP) { |
1712 | BuildMI(MBB, MBBI, dl, OrInst, RBReg). |
1713 | addReg(BPReg). |
1714 | addReg(BPReg); |
1715 | } |
1716 | // If this function contained a fastcc call and GuaranteedTailCallOpt is |
1717 | // enabled (=> hasFastCall()==true) the fastcc call might contain a tail |
1718 | // call which invalidates the stack pointer value in SP(0). So we use the |
1719 | // value of R31 in this case. Similar situation exists with setjmp. |
1720 | else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { |
1721 | assert(HasFP && "Expecting a valid frame pointer.")(static_cast<void> (0)); |
1722 | if (!HasRedZone) |
1723 | RBReg = FPReg; |
1724 | if (!isLargeFrame) { |
1725 | BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) |
1726 | .addReg(FPReg).addImm(FrameSize); |
1727 | } else { |
1728 | BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) |
1729 | .addImm(FrameSize >> 16); |
1730 | BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) |
1731 | .addReg(ScratchReg, RegState::Kill) |
1732 | .addImm(FrameSize & 0xFFFF); |
1733 | BuildMI(MBB, MBBI, dl, AddInst) |
1734 | .addReg(RBReg) |
1735 | .addReg(FPReg) |
1736 | .addReg(ScratchReg); |
1737 | } |
1738 | } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { |
1739 | if (HasRedZone) { |
1740 | BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) |
1741 | .addReg(SPReg) |
1742 | .addImm(FrameSize); |
1743 | } else { |
1744 | // Make sure that adding FrameSize will not overflow the max offset |
1745 | // size. |
1746 | assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&(static_cast<void> (0)) |
1747 | "Local offsets should be negative")(static_cast<void> (0)); |
1748 | SPAdd = FrameSize; |
1749 | FPOffset += FrameSize; |
1750 | BPOffset += FrameSize; |
1751 | PBPOffset += FrameSize; |
1752 | } |
1753 | } else { |
1754 | // We don't want to use ScratchReg as a base register, because it |
1755 | // could happen to be R0. Use FP instead, but make sure to preserve it. |
1756 | if (!HasRedZone) { |
1757 | // If FP is not saved, copy it to ScratchReg. |
1758 | if (!HasFP) |
1759 | BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) |
1760 | .addReg(FPReg) |
1761 | .addReg(FPReg); |
1762 | RBReg = FPReg; |
1763 | } |
1764 | BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) |
1765 | .addImm(0) |
1766 | .addReg(SPReg); |
1767 | } |
1768 | } |
1769 | assert(RBReg != ScratchReg && "Should have avoided ScratchReg")(static_cast<void> (0)); |
1770 | // If there is no red zone, ScratchReg may be needed for holding a useful |
1771 | // value (although not the base register). Make sure it is not overwritten |
1772 | // too early. |
1773 | |
1774 | // If we need to restore both the LR and the CR and we only have one |
1775 | // available scratch register, we must do them one at a time. |
1776 | if (MustSaveCR && SingleScratchReg && MustSaveLR) { |
1777 | // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg |
1778 | // is live here. |
1779 | assert(HasRedZone && "Expecting red zone")(static_cast<void> (0)); |
1780 | BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) |
1781 | .addImm(CRSaveOffset) |
1782 | .addReg(SPReg); |
1783 | for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) |
1784 | BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) |
1785 | .addReg(TempReg, getKillRegState(i == e-1)); |
1786 | } |
1787 | |
1788 | // Delay restoring of the LR if ScratchReg is needed. This is ok, since |
1789 | // LR is stored in the caller's stack frame. ScratchReg will be needed |
1790 | // if RBReg is anything other than SP. We shouldn't use ScratchReg as |
1791 | // a base register anyway, because it may happen to be R0. |
1792 | bool LoadedLR = false; |
1793 | if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { |
1794 | BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) |
1795 | .addImm(LROffset+SPAdd) |
1796 | .addReg(RBReg); |
1797 | LoadedLR = true; |
1798 | } |
1799 | |
1800 | if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { |
1801 | assert(RBReg == SPReg && "Should be using SP as a base register")(static_cast<void> (0)); |
1802 | BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) |
1803 | .addImm(CRSaveOffset) |
1804 | .addReg(RBReg); |
1805 | } |
1806 | |
1807 | if (HasFP) { |
1808 | // If there is red zone, restore FP directly, since SP has already been |
1809 | // restored. Otherwise, restore the value of FP into ScratchReg. |
1810 | if (HasRedZone || RBReg == SPReg) |
1811 | BuildMI(MBB, MBBI, dl, LoadInst, FPReg) |
1812 | .addImm(FPOffset) |
1813 | .addReg(SPReg); |
1814 | else |
1815 | BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) |
1816 | .addImm(FPOffset) |
1817 | .addReg(RBReg); |
1818 | } |
1819 | |
1820 | if (FI->usesPICBase()) |
1821 | BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) |
1822 | .addImm(PBPOffset) |
1823 | .addReg(RBReg); |
1824 | |
1825 | if (HasBP) |
1826 | BuildMI(MBB, MBBI, dl, LoadInst, BPReg) |
1827 | .addImm(BPOffset) |
1828 | .addReg(RBReg); |
1829 | |
1830 | // There is nothing more to be loaded from the stack, so now we can |
1831 | // restore SP: SP = RBReg + SPAdd. |
1832 | if (RBReg != SPReg || SPAdd != 0) { |
1833 | assert(!HasRedZone && "This should not happen with red zone")(static_cast<void> (0)); |
1834 | // If SPAdd is 0, generate a copy. |
1835 | if (SPAdd == 0) |
1836 | BuildMI(MBB, MBBI, dl, OrInst, SPReg) |
1837 | .addReg(RBReg) |
1838 | .addReg(RBReg); |
1839 | else |
1840 | BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) |
1841 | .addReg(RBReg) |
1842 | .addImm(SPAdd); |
1843 | |
1844 | assert(RBReg != ScratchReg && "Should be using FP or SP as base register")(static_cast<void> (0)); |
1845 | if (RBReg == FPReg) |
1846 | BuildMI(MBB, MBBI, dl, OrInst, FPReg) |
1847 | .addReg(ScratchReg) |
1848 | .addReg(ScratchReg); |
1849 | |
1850 | // Now load the LR from the caller's stack frame. |
1851 | if (MustSaveLR && !LoadedLR) |
1852 | BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) |
1853 | .addImm(LROffset) |
1854 | .addReg(SPReg); |
1855 | } |
1856 | |
1857 | if (MustSaveCR && |
1858 | !(SingleScratchReg && MustSaveLR)) |
1859 | for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) |
1860 | BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) |
1861 | .addReg(TempReg, getKillRegState(i == e-1)); |
1862 | |
1863 | if (MustSaveLR) { |
1864 | // If ROP protection is required, an extra instruction is added to compute a |
1865 | // hash and then compare it to the hash stored in the prologue. |
1866 | if (HasROPProtect) { |
1867 | const int SaveIndex = FI->getROPProtectionHashSaveIndex(); |
1868 | const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex); |
1869 | assert((ImmOffset <= -8 && ImmOffset >= -512) &&(static_cast<void> (0)) |
1870 | "ROP hash check location offset out of range.")(static_cast<void> (0)); |
1871 | assert(((ImmOffset & 0x7) == 0) &&(static_cast<void> (0)) |
1872 | "ROP hash check location offset must be 8 byte aligned.")(static_cast<void> (0)); |
1873 | BuildMI(MBB, StackUpdateLoc, dl, HashChk) |
1874 | .addReg(ScratchReg) |
1875 | .addImm(ImmOffset) |
1876 | .addReg(SPReg); |
1877 | } |
1878 | BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); |
1879 | } |
1880 | |
1881 | // Callee pop calling convention. Pop parameter/linkage area. Used for tail |
1882 | // call optimization |
1883 | if (IsReturnBlock) { |
1884 | unsigned RetOpcode = MBBI->getOpcode(); |
1885 | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
1886 | (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && |
1887 | MF.getFunction().getCallingConv() == CallingConv::Fast) { |
1888 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
1889 | unsigned CallerAllocatedAmt = FI->getMinReservedArea(); |
1890 | |
1891 | if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { |
1892 | BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) |
1893 | .addReg(SPReg).addImm(CallerAllocatedAmt); |
1894 | } else { |
1895 | BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) |
1896 | .addImm(CallerAllocatedAmt >> 16); |
1897 | BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) |
1898 | .addReg(ScratchReg, RegState::Kill) |
1899 | .addImm(CallerAllocatedAmt & 0xFFFF); |
1900 | BuildMI(MBB, MBBI, dl, AddInst) |
1901 | .addReg(SPReg) |
1902 | .addReg(FPReg) |
1903 | .addReg(ScratchReg); |
1904 | } |
1905 | } else { |
1906 | createTailCallBranchInstr(MBB); |
1907 | } |
1908 | } |
1909 | } |
1910 | |
1911 | void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { |
1912 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
1913 | |
1914 | // If we got this far a first terminator should exist. |
1915 | assert(MBBI != MBB.end() && "Failed to find the first terminator.")(static_cast<void> (0)); |
1916 | |
1917 | DebugLoc dl = MBBI->getDebugLoc(); |
1918 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
1919 | |
1920 | // Create branch instruction for pseudo tail call return instruction. |
1921 | // The TCRETURNdi variants are direct calls. Valid targets for those are |
1922 | // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel |
1923 | // since we can tail call external functions with PC-Rel (i.e. we don't need |
1924 | // to worry about different TOC pointers). Some of the external functions will |
1925 | // be MO_GlobalAddress while others like memcpy for example, are going to |
1926 | // be MO_ExternalSymbol. |
1927 | unsigned RetOpcode = MBBI->getOpcode(); |
1928 | if (RetOpcode == PPC::TCRETURNdi) { |
1929 | MBBI = MBB.getLastNonDebugInstr(); |
1930 | MachineOperand &JumpTarget = MBBI->getOperand(0); |
1931 | if (JumpTarget.isGlobal()) |
1932 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). |
1933 | addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); |
1934 | else if (JumpTarget.isSymbol()) |
1935 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). |
1936 | addExternalSymbol(JumpTarget.getSymbolName()); |
1937 | else |
1938 | llvm_unreachable("Expecting Global or External Symbol")__builtin_unreachable(); |
1939 | } else if (RetOpcode == PPC::TCRETURNri) { |
1940 | MBBI = MBB.getLastNonDebugInstr(); |
1941 | assert(MBBI->getOperand(0).isReg() && "Expecting register operand.")(static_cast<void> (0)); |
1942 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); |
1943 | } else if (RetOpcode == PPC::TCRETURNai) { |
1944 | MBBI = MBB.getLastNonDebugInstr(); |
1945 | MachineOperand &JumpTarget = MBBI->getOperand(0); |
1946 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); |
1947 | } else if (RetOpcode == PPC::TCRETURNdi8) { |
1948 | MBBI = MBB.getLastNonDebugInstr(); |
1949 | MachineOperand &JumpTarget = MBBI->getOperand(0); |
1950 | if (JumpTarget.isGlobal()) |
1951 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). |
1952 | addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); |
1953 | else if (JumpTarget.isSymbol()) |
1954 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). |
1955 | addExternalSymbol(JumpTarget.getSymbolName()); |
1956 | else |
1957 | llvm_unreachable("Expecting Global or External Symbol")__builtin_unreachable(); |
1958 | } else if (RetOpcode == PPC::TCRETURNri8) { |
1959 | MBBI = MBB.getLastNonDebugInstr(); |
1960 | assert(MBBI->getOperand(0).isReg() && "Expecting register operand.")(static_cast<void> (0)); |
1961 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); |
1962 | } else if (RetOpcode == PPC::TCRETURNai8) { |
1963 | MBBI = MBB.getLastNonDebugInstr(); |
1964 | MachineOperand &JumpTarget = MBBI->getOperand(0); |
1965 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); |
1966 | } |
1967 | } |
1968 | |
1969 | void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, |
1970 | BitVector &SavedRegs, |
1971 | RegScavenger *RS) const { |
1972 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); |
1973 | |
1974 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
1975 | |
1976 | // Save and clear the LR state. |
1977 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
1978 | unsigned LR = RegInfo->getRARegister(); |
1979 | FI->setMustSaveLR(MustSaveLR(MF, LR)); |
1980 | SavedRegs.reset(LR); |
1981 | |
1982 | // Save R31 if necessary |
1983 | int FPSI = FI->getFramePointerSaveIndex(); |
1984 | const bool isPPC64 = Subtarget.isPPC64(); |
1985 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1986 | |
1987 | // If the frame pointer save index hasn't been defined yet. |
1988 | if (!FPSI && needsFP(MF)) { |
1989 | // Find out what the fix offset of the frame pointer save area. |
1990 | int FPOffset = getFramePointerSaveOffset(); |
1991 | // Allocate the frame index for frame pointer save area. |
1992 | FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); |
1993 | // Save the result. |
1994 | FI->setFramePointerSaveIndex(FPSI); |
1995 | } |
1996 | |
1997 | int BPSI = FI->getBasePointerSaveIndex(); |
1998 | if (!BPSI && RegInfo->hasBasePointer(MF)) { |
1999 | int BPOffset = getBasePointerSaveOffset(); |
2000 | // Allocate the frame index for the base pointer save area. |
2001 | BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); |
2002 | // Save the result. |
2003 | FI->setBasePointerSaveIndex(BPSI); |
2004 | } |
2005 | |
2006 | // Reserve stack space for the PIC Base register (R30). |
2007 | // Only used in SVR4 32-bit. |
2008 | if (FI->usesPICBase()) { |
2009 | int PBPSI = MFI.CreateFixedObject(4, -8, true); |
2010 | FI->setPICBasePointerSaveIndex(PBPSI); |
2011 | } |
2012 | |
2013 | // Make sure we don't explicitly spill r31, because, for example, we have |
2014 | // some inline asm which explicitly clobbers it, when we otherwise have a |
2015 | // frame pointer and are using r31's spill slot for the prologue/epilogue |
2016 | // code. Same goes for the base pointer and the PIC base register. |
2017 | if (needsFP(MF)) |
2018 | SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); |
2019 | if (RegInfo->hasBasePointer(MF)) |
2020 | SavedRegs.reset(RegInfo->getBaseRegister(MF)); |
2021 | if (FI->usesPICBase()) |
2022 | SavedRegs.reset(PPC::R30); |
2023 | |
2024 | // Reserve stack space to move the linkage area to in case of a tail call. |
2025 | int TCSPDelta = 0; |
2026 | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
2027 | (TCSPDelta = FI->getTailCallSPDelta()) < 0) { |
2028 | MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); |
2029 | } |
2030 | |
2031 | // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. |
2032 | // For 64-bit SVR4, and all flavors of AIX we create a FixedStack |
2033 | // object at the offset of the CR-save slot in the linkage area. The actual |
2034 | // save and restore of the condition register will be created as part of the |
2035 | // prologue and epilogue insertion, but the FixedStack object is needed to |
2036 | // keep the CalleSavedInfo valid. |
2037 | if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || |
2038 | SavedRegs.test(PPC::CR4))) { |
2039 | const uint64_t SpillSize = 4; // Condition register is always 4 bytes. |
2040 | const int64_t SpillOffset = |
2041 | Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; |
2042 | int FrameIdx = |
2043 | MFI.CreateFixedObject(SpillSize, SpillOffset, |
2044 | /* IsImmutable */ true, /* IsAliased */ false); |
2045 | FI->setCRSpillFrameIndex(FrameIdx); |
2046 | } |
2047 | } |
2048 | |
2049 | void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, |
2050 | RegScavenger *RS) const { |
2051 | // Get callee saved register information. |
2052 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2053 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
2054 | |
2055 | // If the function is shrink-wrapped, and if the function has a tail call, the |
2056 | // tail call might not be in the new RestoreBlock, so real branch instruction |
2057 | // won't be generated by emitEpilogue(), because shrink-wrap has chosen new |
2058 | // RestoreBlock. So we handle this case here. |
2059 | if (MFI.getSavePoint() && MFI.hasTailCall()) { |
2060 | MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); |
2061 | for (MachineBasicBlock &MBB : MF) { |
2062 | if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) |
2063 | createTailCallBranchInstr(MBB); |
2064 | } |
2065 | } |
2066 | |
2067 | // Early exit if no callee saved registers are modified! |
2068 | if (CSI.empty() && !needsFP(MF)) { |
2069 | addScavengingSpillSlot(MF, RS); |
2070 | return; |
2071 | } |
2072 | |
2073 | unsigned MinGPR = PPC::R31; |
2074 | unsigned MinG8R = PPC::X31; |
2075 | unsigned MinFPR = PPC::F31; |
2076 | unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; |
2077 | |
2078 | bool HasGPSaveArea = false; |
2079 | bool HasG8SaveArea = false; |
2080 | bool HasFPSaveArea = false; |
2081 | bool HasVRSaveArea = false; |
2082 | |
2083 | SmallVector<CalleeSavedInfo, 18> GPRegs; |
2084 | SmallVector<CalleeSavedInfo, 18> G8Regs; |
2085 | SmallVector<CalleeSavedInfo, 18> FPRegs; |
2086 | SmallVector<CalleeSavedInfo, 18> VRegs; |
2087 | |
2088 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { |
2089 | unsigned Reg = CSI[i].getReg(); |
2090 | assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||(static_cast<void> (0)) |
2091 | (Reg != PPC::X2 && Reg != PPC::R2)) &&(static_cast<void> (0)) |
2092 | "Not expecting to try to spill R2 in a function that must save TOC")(static_cast<void> (0)); |
2093 | if (PPC::GPRCRegClass.contains(Reg)) { |
2094 | HasGPSaveArea = true; |
2095 | |
2096 | GPRegs.push_back(CSI[i]); |
2097 | |
2098 | if (Reg < MinGPR) { |
2099 | MinGPR = Reg; |
2100 | } |
2101 | } else if (PPC::G8RCRegClass.contains(Reg)) { |
2102 | HasG8SaveArea = true; |
2103 | |
2104 | G8Regs.push_back(CSI[i]); |
2105 | |
2106 | if (Reg < MinG8R) { |
2107 | MinG8R = Reg; |
2108 | } |
2109 | } else if (PPC::F8RCRegClass.contains(Reg)) { |
2110 | HasFPSaveArea = true; |
2111 | |
2112 | FPRegs.push_back(CSI[i]); |
2113 | |
2114 | if (Reg < MinFPR) { |
2115 | MinFPR = Reg; |
2116 | } |
2117 | } else if (PPC::CRBITRCRegClass.contains(Reg) || |
2118 | PPC::CRRCRegClass.contains(Reg)) { |
2119 | ; // do nothing, as we already know whether CRs are spilled |
2120 | } else if (PPC::VRRCRegClass.contains(Reg) || |
2121 | PPC::SPERCRegClass.contains(Reg)) { |
2122 | // Altivec and SPE are mutually exclusive, but have the same stack |
2123 | // alignment requirements, so overload the save area for both cases. |
2124 | HasVRSaveArea = true; |
2125 | |
2126 | VRegs.push_back(CSI[i]); |
2127 | |
2128 | if (Reg < MinVR) { |
2129 | MinVR = Reg; |
2130 | } |
2131 | } else { |
2132 | llvm_unreachable("Unknown RegisterClass!")__builtin_unreachable(); |
2133 | } |
2134 | } |
2135 | |
2136 | PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); |
2137 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
2138 | |
2139 | int64_t LowerBound = 0; |
2140 | |
2141 | // Take into account stack space reserved for tail calls. |
2142 | int TCSPDelta = 0; |
2143 | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
2144 | (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { |
2145 | LowerBound = TCSPDelta; |
2146 | } |
2147 | |
2148 | // The Floating-point register save area is right below the back chain word |
2149 | // of the previous stack frame. |
2150 | if (HasFPSaveArea) { |
2151 | for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { |
2152 | int FI = FPRegs[i].getFrameIdx(); |
2153 | |
2154 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
2155 | } |
2156 | |
2157 | LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; |
2158 | } |
2159 | |
2160 | // Check whether the frame pointer register is allocated. If so, make sure it |
2161 | // is spilled to the correct offset. |
2162 | if (needsFP(MF)) { |
2163 | int FI = PFI->getFramePointerSaveIndex(); |
2164 | assert(FI && "No Frame Pointer Save Slot!")(static_cast<void> (0)); |
2165 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
2166 | // FP is R31/X31, so no need to update MinGPR/MinG8R. |
2167 | HasGPSaveArea = true; |
2168 | } |
2169 | |
2170 | if (PFI->usesPICBase()) { |
2171 | int FI = PFI->getPICBasePointerSaveIndex(); |
2172 | assert(FI && "No PIC Base Pointer Save Slot!")(static_cast<void> (0)); |
2173 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
2174 | |
2175 | MinGPR = std::min<unsigned>(MinGPR, PPC::R30); |
2176 | HasGPSaveArea = true; |
2177 | } |
2178 | |
2179 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
2180 | if (RegInfo->hasBasePointer(MF)) { |
2181 | int FI = PFI->getBasePointerSaveIndex(); |
2182 | assert(FI && "No Base Pointer Save Slot!")(static_cast<void> (0)); |
2183 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
2184 | |
2185 | Register BP = RegInfo->getBaseRegister(MF); |
2186 | if (PPC::G8RCRegClass.contains(BP)) { |
2187 | MinG8R = std::min<unsigned>(MinG8R, BP); |
2188 | HasG8SaveArea = true; |
2189 | } else if (PPC::GPRCRegClass.contains(BP)) { |
2190 | MinGPR = std::min<unsigned>(MinGPR, BP); |
2191 | HasGPSaveArea = true; |
2192 | } |
2193 | } |
2194 | |
2195 | // General register save area starts right below the Floating-point |
2196 | // register save area. |
2197 | if (HasGPSaveArea || HasG8SaveArea) { |
2198 | // Move general register save area spill slots down, taking into account |
2199 | // the size of the Floating-point register save area. |
2200 | for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { |
2201 | if (!GPRegs[i].isSpilledToReg()) { |
2202 | int FI = GPRegs[i].getFrameIdx(); |
2203 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
2204 | } |
2205 | } |
2206 | |
2207 | // Move general register save area spill slots down, taking into account |
2208 | // the size of the Floating-point register save area. |
2209 | for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { |
2210 | if (!G8Regs[i].isSpilledToReg()) { |
2211 | int FI = G8Regs[i].getFrameIdx(); |
2212 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
2213 | } |
2214 | } |
2215 | |
2216 | unsigned MinReg = |
2217 | std::min<unsigned>(TRI->getEncodingValue(MinGPR), |
2218 | TRI->getEncodingValue(MinG8R)); |
2219 | |
2220 | const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; |
2221 | LowerBound -= (31 - MinReg + 1) * GPRegSize; |
2222 | } |
2223 | |
2224 | // For 32-bit only, the CR save area is below the general register |
2225 | // save area. For 64-bit SVR4, the CR save area is addressed relative |
2226 | // to the stack pointer and hence does not need an adjustment here. |
2227 | // Only CR2 (the first nonvolatile spilled) has an associated frame |
2228 | // index so that we have a single uniform save area. |
2229 | if (spillsCR(MF) && Subtarget.is32BitELFABI()) { |
2230 | // Adjust the frame index of the CR spill slot. |
2231 | for (const auto &CSInfo : CSI) { |
2232 | if (CSInfo.getReg() == PPC::CR2) { |
2233 | int FI = CSInfo.getFrameIdx(); |
2234 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
2235 | break; |
2236 | } |
2237 | } |
2238 | |
2239 | LowerBound -= 4; // The CR save area is always 4 bytes long. |
2240 | } |
2241 | |
2242 | // Both Altivec and SPE have the same alignment and padding requirements |
2243 | // within the stack frame. |
2244 | if (HasVRSaveArea) { |
2245 | // Insert alignment padding, we need 16-byte alignment. Note: for positive |
2246 | // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since |
2247 | // we are using negative number here (the stack grows downward). We should |
2248 | // use formula : y = x & (~(n-1)). Where x is the size before aligning, n |
2249 | // is the alignment size ( n = 16 here) and y is the size after aligning. |
2250 | assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!")(static_cast<void> (0)); |
2251 | LowerBound &= ~(15); |
2252 | |
2253 | for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { |
2254 | int FI = VRegs[i].getFrameIdx(); |
2255 | |
2256 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
2257 | } |
2258 | } |
2259 | |
2260 | addScavengingSpillSlot(MF, RS); |
2261 | } |
2262 | |
2263 | void |
2264 | PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, |
2265 | RegScavenger *RS) const { |
2266 | // Reserve a slot closest to SP or frame pointer if we have a dynalloc or |
2267 | // a large stack, which will require scavenging a register to materialize a |
2268 | // large offset. |
2269 | |
2270 | // We need to have a scavenger spill slot for spills if the frame size is |
2271 | // large. In case there is no free register for large-offset addressing, |
2272 | // this slot is used for the necessary emergency spill. Also, we need the |
2273 | // slot for dynamic stack allocations. |
2274 | |
2275 | // The scavenger might be invoked if the frame offset does not fit into |
2276 | // the 16-bit immediate. We don't know the complete frame size here |
2277 | // because we've not yet computed callee-saved register spills or the |
2278 | // needed alignment padding. |
2279 | unsigned StackSize = determineFrameLayout(MF, true); |
2280 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2281 | if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || |
2282 | (hasSpills(MF) && !isInt<16>(StackSize))) { |
2283 | const TargetRegisterClass &GPRC = PPC::GPRCRegClass; |
2284 | const TargetRegisterClass &G8RC = PPC::G8RCRegClass; |
2285 | const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; |
2286 | const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); |
2287 | unsigned Size = TRI.getSpillSize(RC); |
2288 | Align Alignment = TRI.getSpillAlign(RC); |
2289 | RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); |
2290 | |
2291 | // Might we have over-aligned allocas? |
2292 | bool HasAlVars = |
2293 | MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); |
2294 | |
2295 | // These kinds of spills might need two registers. |
2296 | if (spillsCR(MF) || HasAlVars) |
2297 | RS->addScavengingFrameIndex( |
2298 | MFI.CreateStackObject(Size, Alignment, false)); |
2299 | } |
2300 | } |
2301 | |
2302 | // This function checks if a callee saved gpr can be spilled to a volatile |
2303 | // vector register. This occurs for leaf functions when the option |
2304 | // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers |
2305 | // which were not spilled to vectors, return false so the target independent |
2306 | // code can handle them by assigning a FrameIdx to a stack slot. |
2307 | bool PPCFrameLowering::assignCalleeSavedSpillSlots( |
2308 | MachineFunction &MF, const TargetRegisterInfo *TRI, |
2309 | std::vector<CalleeSavedInfo> &CSI) const { |
2310 | |
2311 | if (CSI.empty()) |
2312 | return true; // Early exit if no callee saved registers are modified! |
2313 | |
2314 | // Early exit if cannot spill gprs to volatile vector registers. |
2315 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2316 | if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) |
2317 | return false; |
2318 | |
2319 | // Build a BitVector of VSRs that can be used for spilling GPRs. |
2320 | BitVector BVAllocatable = TRI->getAllocatableSet(MF); |
2321 | BitVector BVCalleeSaved(TRI->getNumRegs()); |
2322 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
2323 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); |
2324 | for (unsigned i = 0; CSRegs[i]; ++i) |
2325 | BVCalleeSaved.set(CSRegs[i]); |
2326 | |
2327 | for (unsigned Reg : BVAllocatable.set_bits()) { |
2328 | // Set to 0 if the register is not a volatile VSX register, or if it is |
2329 | // used in the function. |
2330 | if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) || |
2331 | MF.getRegInfo().isPhysRegUsed(Reg)) |
2332 | BVAllocatable.reset(Reg); |
2333 | } |
2334 | |
2335 | bool AllSpilledToReg = true; |
2336 | unsigned LastVSRUsedForSpill = 0; |
2337 | for (auto &CS : CSI) { |
2338 | if (BVAllocatable.none()) |
2339 | return false; |
2340 | |
2341 | unsigned Reg = CS.getReg(); |
2342 | |
2343 | if (!PPC::G8RCRegClass.contains(Reg)) { |
2344 | AllSpilledToReg = false; |
2345 | continue; |
2346 | } |
2347 | |
2348 | // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs |
2349 | // into one VSR using the mtvsrdd instruction. |
2350 | if (LastVSRUsedForSpill != 0) { |
2351 | CS.setDstReg(LastVSRUsedForSpill); |
2352 | BVAllocatable.reset(LastVSRUsedForSpill); |
2353 | LastVSRUsedForSpill = 0; |
2354 | continue; |
2355 | } |
2356 | |
2357 | unsigned VolatileVFReg = BVAllocatable.find_first(); |
2358 | if (VolatileVFReg < BVAllocatable.size()) { |
2359 | CS.setDstReg(VolatileVFReg); |
2360 | LastVSRUsedForSpill = VolatileVFReg; |
2361 | } else { |
2362 | AllSpilledToReg = false; |
2363 | } |
2364 | } |
2365 | return AllSpilledToReg; |
2366 | } |
2367 | |
2368 | bool PPCFrameLowering::spillCalleeSavedRegisters( |
2369 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
2370 | ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
2371 | |
2372 | MachineFunction *MF = MBB.getParent(); |
2373 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
2374 | PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); |
2375 | bool MustSaveTOC = FI->mustSaveTOC(); |
2376 | DebugLoc DL; |
2377 | bool CRSpilled = false; |
2378 | MachineInstrBuilder CRMIB; |
2379 | BitVector Spilled(TRI->getNumRegs()); |
2380 | |
2381 | VSRContainingGPRs.clear(); |
2382 | |
2383 | // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one |
2384 | // or two GPRs, so we need table to record information for later save/restore. |
2385 | llvm::for_each(CSI, [&](const CalleeSavedInfo &Info) { |
2386 | if (Info.isSpilledToReg()) { |
2387 | auto &SpilledVSR = |
2388 | VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second; |
2389 | assert(SpilledVSR.second == 0 &&(static_cast<void> (0)) |
2390 | "Can't spill more than two GPRs into VSR!")(static_cast<void> (0)); |
2391 | if (SpilledVSR.first == 0) |
2392 | SpilledVSR.first = Info.getReg(); |
2393 | else |
2394 | SpilledVSR.second = Info.getReg(); |
2395 | } |
2396 | }); |
2397 | |
2398 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { |
2399 | unsigned Reg = CSI[i].getReg(); |
2400 | |
2401 | // CR2 through CR4 are the nonvolatile CR fields. |
2402 | bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; |
2403 | |
2404 | // Add the callee-saved register as live-in; it's killed at the spill. |
2405 | // Do not do this for callee-saved registers that are live-in to the |
2406 | // function because they will already be marked live-in and this will be |
2407 | // adding it for a second time. It is an error to add the same register |
2408 | // to the set more than once. |
2409 | const MachineRegisterInfo &MRI = MF->getRegInfo(); |
2410 | bool IsLiveIn = MRI.isLiveIn(Reg); |
2411 | if (!IsLiveIn) |
2412 | MBB.addLiveIn(Reg); |
2413 | |
2414 | if (CRSpilled && IsCRField) { |
2415 | CRMIB.addReg(Reg, RegState::ImplicitKill); |
2416 | continue; |
2417 | } |
2418 | |
2419 | // The actual spill will happen in the prologue. |
2420 | if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) |
2421 | continue; |
2422 | |
2423 | // Insert the spill to the stack frame. |
2424 | if (IsCRField) { |
2425 | PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); |
2426 | if (!Subtarget.is32BitELFABI()) { |
2427 | // The actual spill will happen at the start of the prologue. |
2428 | FuncInfo->addMustSaveCR(Reg); |
2429 | } else { |
2430 | CRSpilled = true; |
2431 | FuncInfo->setSpillsCR(); |
2432 | |
2433 | // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have |
2434 | // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. |
2435 | CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) |
2436 | .addReg(Reg, RegState::ImplicitKill); |
2437 | |
2438 | MBB.insert(MI, CRMIB); |
2439 | MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) |
2440 | .addReg(PPC::R12, |
2441 | getKillRegState(true)), |
2442 | CSI[i].getFrameIdx())); |
2443 | } |
2444 | } else { |
2445 | if (CSI[i].isSpilledToReg()) { |
2446 | unsigned Dst = CSI[i].getDstReg(); |
2447 | |
2448 | if (Spilled[Dst]) |
2449 | continue; |
2450 | |
2451 | if (VSRContainingGPRs[Dst].second != 0) { |
2452 | assert(Subtarget.hasP9Vector() &&(static_cast<void> (0)) |
2453 | "mtvsrdd is unavailable on pre-P9 targets.")(static_cast<void> (0)); |
2454 | |
2455 | NumPESpillVSR += 2; |
2456 | BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst) |
2457 | .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true)) |
2458 | .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true)); |
2459 | } else if (VSRContainingGPRs[Dst].second == 0) { |
2460 | assert(Subtarget.hasP8Vector() &&(static_cast<void> (0)) |
2461 | "Can't move GPR to VSR on pre-P8 targets.")(static_cast<void> (0)); |
2462 | |
2463 | ++NumPESpillVSR; |
2464 | BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), |
2465 | TRI->getSubReg(Dst, PPC::sub_64)) |
2466 | .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true)); |
2467 | } else { |
2468 | llvm_unreachable("More than two GPRs spilled to a VSR!")__builtin_unreachable(); |
2469 | } |
2470 | Spilled.set(Dst); |
2471 | } else { |
2472 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
2473 | // Use !IsLiveIn for the kill flag. |
2474 | // We do not want to kill registers that are live in this function |
2475 | // before their use because they will become undefined registers. |
2476 | // Functions without NoUnwind need to preserve the order of elements in |
2477 | // saved vector registers. |
2478 | if (Subtarget.needsSwapsForVSXMemOps() && |
2479 | !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) |
2480 | TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, |
2481 | CSI[i].getFrameIdx(), RC, TRI); |
2482 | else |
2483 | TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), |
2484 | RC, TRI); |
2485 | } |
2486 | } |
2487 | } |
2488 | return true; |
2489 | } |
2490 | |
2491 | static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, |
2492 | bool CR4Spilled, MachineBasicBlock &MBB, |
2493 | MachineBasicBlock::iterator MI, |
2494 | ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { |
2495 | |
2496 | MachineFunction *MF = MBB.getParent(); |
2497 | const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); |
2498 | DebugLoc DL; |
2499 | unsigned MoveReg = PPC::R12; |
2500 | |
2501 | // 32-bit: FP-relative |
2502 | MBB.insert(MI, |
2503 | addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), |
2504 | CSI[CSIIndex].getFrameIdx())); |
2505 | |
2506 | unsigned RestoreOp = PPC::MTOCRF; |
2507 | if (CR2Spilled) |
2508 | MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) |
2509 | .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); |
2510 | |
2511 | if (CR3Spilled) |
2512 | MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) |
2513 | .addReg(MoveReg, getKillRegState(!CR4Spilled))); |
2514 | |
2515 | if (CR4Spilled) |
2516 | MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) |
2517 | .addReg(MoveReg, getKillRegState(true))); |
2518 | } |
2519 | |
2520 | MachineBasicBlock::iterator PPCFrameLowering:: |
2521 | eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, |
2522 | MachineBasicBlock::iterator I) const { |
2523 | const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
2524 | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
2525 | I->getOpcode() == PPC::ADJCALLSTACKUP) { |
2526 | // Add (actually subtract) back the amount the callee popped on return. |
2527 | if (int CalleeAmt = I->getOperand(1).getImm()) { |
2528 | bool is64Bit = Subtarget.isPPC64(); |
2529 | CalleeAmt *= -1; |
2530 | unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; |
2531 | unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; |
2532 | unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; |
2533 | unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; |
2534 | unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; |
2535 | unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; |
2536 | const DebugLoc &dl = I->getDebugLoc(); |
2537 | |
2538 | if (isInt<16>(CalleeAmt)) { |
2539 | BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) |
2540 | .addReg(StackReg, RegState::Kill) |
2541 | .addImm(CalleeAmt); |
2542 | } else { |
2543 | MachineBasicBlock::iterator MBBI = I; |
2544 | BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) |
2545 | .addImm(CalleeAmt >> 16); |
2546 | BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) |
2547 | .addReg(TmpReg, RegState::Kill) |
2548 | .addImm(CalleeAmt & 0xFFFF); |
2549 | BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) |
2550 | .addReg(StackReg, RegState::Kill) |
2551 | .addReg(TmpReg); |
2552 | } |
2553 | } |
2554 | } |
2555 | // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. |
2556 | return MBB.erase(I); |
2557 | } |
2558 | |
2559 | static bool isCalleeSavedCR(unsigned Reg) { |
2560 | return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; |
2561 | } |
2562 | |
2563 | bool PPCFrameLowering::restoreCalleeSavedRegisters( |
2564 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
2565 | MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
2566 | MachineFunction *MF = MBB.getParent(); |
2567 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
2568 | PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); |
2569 | bool MustSaveTOC = FI->mustSaveTOC(); |
2570 | bool CR2Spilled = false; |
2571 | bool CR3Spilled = false; |
2572 | bool CR4Spilled = false; |
2573 | unsigned CSIIndex = 0; |
2574 | BitVector Restored(TRI->getNumRegs()); |
2575 | |
2576 | // Initialize insertion-point logic; we will be restoring in reverse |
2577 | // order of spill. |
2578 | MachineBasicBlock::iterator I = MI, BeforeI = I; |
2579 | bool AtStart = I == MBB.begin(); |
2580 | |
2581 | if (!AtStart) |
2582 | --BeforeI; |
2583 | |
2584 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { |
2585 | unsigned Reg = CSI[i].getReg(); |
2586 | |
2587 | if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) |
2588 | continue; |
2589 | |
2590 | // Restore of callee saved condition register field is handled during |
2591 | // epilogue insertion. |
2592 | if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) |
2593 | continue; |
2594 | |
2595 | if (Reg == PPC::CR2) { |
2596 | CR2Spilled = true; |
2597 | // The spill slot is associated only with CR2, which is the |
2598 | // first nonvolatile spilled. Save it here. |
2599 | CSIIndex = i; |
2600 | continue; |
2601 | } else if (Reg == PPC::CR3) { |
2602 | CR3Spilled = true; |
2603 | continue; |
2604 | } else if (Reg == PPC::CR4) { |
2605 | CR4Spilled = true; |
2606 | continue; |
2607 | } else { |
2608 | // On 32-bit ELF when we first encounter a non-CR register after seeing at |
2609 | // least one CR register, restore all spilled CRs together. |
2610 | if (CR2Spilled || CR3Spilled || CR4Spilled) { |
2611 | bool is31 = needsFP(*MF); |
2612 | restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, |
2613 | CSIIndex); |
2614 | CR2Spilled = CR3Spilled = CR4Spilled = false; |
2615 | } |
2616 | |
2617 | if (CSI[i].isSpilledToReg()) { |
2618 | DebugLoc DL; |
2619 | unsigned Dst = CSI[i].getDstReg(); |
2620 | |
2621 | if (Restored[Dst]) |
2622 | continue; |
2623 | |
2624 | if (VSRContainingGPRs[Dst].second != 0) { |
2625 | assert(Subtarget.hasP9Vector())(static_cast<void> (0)); |
2626 | NumPEReloadVSR += 2; |
2627 | BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD), |
2628 | VSRContainingGPRs[Dst].second) |
2629 | .addReg(Dst); |
2630 | BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), |
2631 | VSRContainingGPRs[Dst].first) |
2632 | .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); |
2633 | } else if (VSRContainingGPRs[Dst].second == 0) { |
2634 | assert(Subtarget.hasP8Vector())(static_cast<void> (0)); |
2635 | ++NumPEReloadVSR; |
2636 | BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), |
2637 | VSRContainingGPRs[Dst].first) |
2638 | .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); |
2639 | } else { |
2640 | llvm_unreachable("More than two GPRs spilled to a VSR!")__builtin_unreachable(); |
2641 | } |
2642 | |
2643 | Restored.set(Dst); |
2644 | |
2645 | } else { |
2646 | // Default behavior for non-CR saves. |
2647 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
2648 | |
2649 | // Functions without NoUnwind need to preserve the order of elements in |
2650 | // saved vector registers. |
2651 | if (Subtarget.needsSwapsForVSXMemOps() && |
2652 | !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) |
2653 | TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, |
2654 | TRI); |
2655 | else |
2656 | TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); |
2657 | |
2658 | assert(I != MBB.begin() &&(static_cast<void> (0)) |
2659 | "loadRegFromStackSlot didn't insert any code!")(static_cast<void> (0)); |
2660 | } |
2661 | } |
2662 | |
2663 | // Insert in reverse order. |
2664 | if (AtStart) |
2665 | I = MBB.begin(); |
2666 | else { |
2667 | I = BeforeI; |
2668 | ++I; |
2669 | } |
2670 | } |
2671 | |
2672 | // If we haven't yet spilled the CRs, do so now. |
2673 | if (CR2Spilled || CR3Spilled || CR4Spilled) { |
2674 | assert(Subtarget.is32BitELFABI() &&(static_cast<void> (0)) |
2675 | "Only set CR[2|3|4]Spilled on 32-bit SVR4.")(static_cast<void> (0)); |
2676 | bool is31 = needsFP(*MF); |
2677 | restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); |
2678 | } |
2679 | |
2680 | return true; |
2681 | } |
2682 | |
2683 | uint64_t PPCFrameLowering::getTOCSaveOffset() const { |
2684 | return TOCSaveOffset; |
2685 | } |
2686 | |
2687 | uint64_t PPCFrameLowering::getFramePointerSaveOffset() const { |
2688 | return FramePointerSaveOffset; |
2689 | } |
2690 | |
2691 | uint64_t PPCFrameLowering::getBasePointerSaveOffset() const { |
2692 | return BasePointerSaveOffset; |
2693 | } |
2694 | |
2695 | bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { |
2696 | if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) |
2697 | return false; |
2698 | return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI(); |
2699 | } |