File: | lib/Target/ARM/ARMFrameLowering.cpp |
Location: | line 1710, column 9 |
Description: | Called C++ object pointer is null |
1 | //===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | // | |||
10 | // This file contains the ARM implementation of TargetFrameLowering class. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "ARMFrameLowering.h" | |||
15 | #include "ARMBaseInstrInfo.h" | |||
16 | #include "ARMBaseRegisterInfo.h" | |||
17 | #include "ARMConstantPoolValue.h" | |||
18 | #include "ARMMachineFunctionInfo.h" | |||
19 | #include "MCTargetDesc/ARMAddressingModes.h" | |||
20 | #include "llvm/CodeGen/MachineFrameInfo.h" | |||
21 | #include "llvm/CodeGen/MachineFunction.h" | |||
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
23 | #include "llvm/CodeGen/MachineModuleInfo.h" | |||
24 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
25 | #include "llvm/CodeGen/RegisterScavenging.h" | |||
26 | #include "llvm/MC/MCAsmInfo.h" | |||
27 | #include "llvm/IR/CallingConv.h" | |||
28 | #include "llvm/IR/Function.h" | |||
29 | #include "llvm/MC/MCContext.h" | |||
30 | #include "llvm/Support/CommandLine.h" | |||
31 | #include "llvm/Target/TargetOptions.h" | |||
32 | ||||
33 | using namespace llvm; | |||
34 | ||||
35 | static cl::opt<bool> | |||
36 | SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), | |||
37 | cl::desc("Align ARM NEON spills in prolog and epilog")); | |||
38 | ||||
39 | static MachineBasicBlock::iterator | |||
40 | skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, | |||
41 | unsigned NumAlignedDPRCS2Regs); | |||
42 | ||||
43 | ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) | |||
44 | : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4), | |||
45 | STI(sti) {} | |||
46 | ||||
47 | bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const { | |||
48 | // iOS always has a FP for backtracking, force other targets to keep their FP | |||
49 | // when doing FastISel. The emitted code is currently superior, and in cases | |||
50 | // like test-suite's lencod FastISel isn't quite correct when FP is eliminated. | |||
51 | return TargetFrameLowering::noFramePointerElim(MF) || | |||
52 | MF.getSubtarget<ARMSubtarget>().useFastISel(); | |||
53 | } | |||
54 | ||||
55 | /// hasFP - Return true if the specified function should have a dedicated frame | |||
56 | /// pointer register. This is true if the function has variable sized allocas | |||
57 | /// or if frame pointer elimination is disabled. | |||
58 | bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { | |||
59 | const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); | |||
60 | ||||
61 | // iOS requires FP not to be clobbered for backtracing purpose. | |||
62 | if (STI.isTargetIOS() || STI.isTargetWatchOS()) | |||
63 | return true; | |||
64 | ||||
65 | const MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
66 | // Always eliminate non-leaf frame pointers. | |||
67 | return ((MF.getTarget().Options.DisableFramePointerElim(MF) && | |||
68 | MFI->hasCalls()) || | |||
69 | RegInfo->needsStackRealignment(MF) || | |||
70 | MFI->hasVarSizedObjects() || | |||
71 | MFI->isFrameAddressTaken()); | |||
72 | } | |||
73 | ||||
74 | /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is | |||
75 | /// not required, we reserve argument space for call sites in the function | |||
76 | /// immediately on entry to the current function. This eliminates the need for | |||
77 | /// add/sub sp brackets around call sites. Returns true if the call frame is | |||
78 | /// included as part of the stack frame. | |||
79 | bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { | |||
80 | const MachineFrameInfo *FFI = MF.getFrameInfo(); | |||
81 | unsigned CFSize = FFI->getMaxCallFrameSize(); | |||
82 | // It's not always a good idea to include the call frame as part of the | |||
83 | // stack frame. ARM (especially Thumb) has small immediate offset to | |||
84 | // address the stack frame. So a large call frame can cause poor codegen | |||
85 | // and may even makes it impossible to scavenge a register. | |||
86 | if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 | |||
87 | return false; | |||
88 | ||||
89 | return !MF.getFrameInfo()->hasVarSizedObjects(); | |||
90 | } | |||
91 | ||||
92 | /// canSimplifyCallFramePseudos - If there is a reserved call frame, the | |||
93 | /// call frame pseudos can be simplified. Unlike most targets, having a FP | |||
94 | /// is not sufficient here since we still may reference some objects via SP | |||
95 | /// even when FP is available in Thumb2 mode. | |||
96 | bool | |||
97 | ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { | |||
98 | return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); | |||
99 | } | |||
100 | ||||
101 | static bool isCSRestore(MachineInstr *MI, | |||
102 | const ARMBaseInstrInfo &TII, | |||
103 | const MCPhysReg *CSRegs) { | |||
104 | // Integer spill area is handled with "pop". | |||
105 | if (isPopOpcode(MI->getOpcode())) { | |||
106 | // The first two operands are predicates. The last two are | |||
107 | // imp-def and imp-use of SP. Check everything in between. | |||
108 | for (int i = 5, e = MI->getNumOperands(); i != e; ++i) | |||
109 | if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) | |||
110 | return false; | |||
111 | return true; | |||
112 | } | |||
113 | if ((MI->getOpcode() == ARM::LDR_POST_IMM || | |||
114 | MI->getOpcode() == ARM::LDR_POST_REG || | |||
115 | MI->getOpcode() == ARM::t2LDR_POST) && | |||
116 | isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) && | |||
117 | MI->getOperand(1).getReg() == ARM::SP) | |||
118 | return true; | |||
119 | ||||
120 | return false; | |||
121 | } | |||
122 | ||||
123 | static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, | |||
124 | MachineBasicBlock::iterator &MBBI, DebugLoc dl, | |||
125 | const ARMBaseInstrInfo &TII, unsigned DestReg, | |||
126 | unsigned SrcReg, int NumBytes, | |||
127 | unsigned MIFlags = MachineInstr::NoFlags, | |||
128 | ARMCC::CondCodes Pred = ARMCC::AL, | |||
129 | unsigned PredReg = 0) { | |||
130 | if (isARM) | |||
131 | emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, | |||
132 | Pred, PredReg, TII, MIFlags); | |||
133 | else | |||
134 | emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, | |||
135 | Pred, PredReg, TII, MIFlags); | |||
136 | } | |||
137 | ||||
138 | static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, | |||
139 | MachineBasicBlock::iterator &MBBI, DebugLoc dl, | |||
140 | const ARMBaseInstrInfo &TII, int NumBytes, | |||
141 | unsigned MIFlags = MachineInstr::NoFlags, | |||
142 | ARMCC::CondCodes Pred = ARMCC::AL, | |||
143 | unsigned PredReg = 0) { | |||
144 | emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes, | |||
145 | MIFlags, Pred, PredReg); | |||
146 | } | |||
147 | ||||
148 | static int sizeOfSPAdjustment(const MachineInstr *MI) { | |||
149 | int RegSize; | |||
150 | switch (MI->getOpcode()) { | |||
151 | case ARM::VSTMDDB_UPD: | |||
152 | RegSize = 8; | |||
153 | break; | |||
154 | case ARM::STMDB_UPD: | |||
155 | case ARM::t2STMDB_UPD: | |||
156 | RegSize = 4; | |||
157 | break; | |||
158 | case ARM::t2STR_PRE: | |||
159 | case ARM::STR_PRE_IMM: | |||
160 | return 4; | |||
161 | default: | |||
162 | llvm_unreachable("Unknown push or pop like instruction")::llvm::llvm_unreachable_internal("Unknown push or pop like instruction" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 162); | |||
163 | } | |||
164 | ||||
165 | int count = 0; | |||
166 | // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ | |||
167 | // pred) so the list starts at 4. | |||
168 | for (int i = MI->getNumOperands() - 1; i >= 4; --i) | |||
169 | count += RegSize; | |||
170 | return count; | |||
171 | } | |||
172 | ||||
173 | static bool WindowsRequiresStackProbe(const MachineFunction &MF, | |||
174 | size_t StackSizeInBytes) { | |||
175 | const MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
176 | const Function *F = MF.getFunction(); | |||
177 | unsigned StackProbeSize = (MFI->getStackProtectorIndex() > 0) ? 4080 : 4096; | |||
178 | if (F->hasFnAttribute("stack-probe-size")) | |||
179 | F->getFnAttribute("stack-probe-size") | |||
180 | .getValueAsString() | |||
181 | .getAsInteger(0, StackProbeSize); | |||
182 | return StackSizeInBytes >= StackProbeSize; | |||
183 | } | |||
184 | ||||
185 | namespace { | |||
186 | struct StackAdjustingInsts { | |||
187 | struct InstInfo { | |||
188 | MachineBasicBlock::iterator I; | |||
189 | unsigned SPAdjust; | |||
190 | bool BeforeFPSet; | |||
191 | }; | |||
192 | ||||
193 | SmallVector<InstInfo, 4> Insts; | |||
194 | ||||
195 | void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust, | |||
196 | bool BeforeFPSet = false) { | |||
197 | InstInfo Info = {I, SPAdjust, BeforeFPSet}; | |||
198 | Insts.push_back(Info); | |||
199 | } | |||
200 | ||||
201 | void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) { | |||
202 | auto Info = std::find_if(Insts.begin(), Insts.end(), | |||
203 | [&](InstInfo &Info) { return Info.I == I; }); | |||
204 | assert(Info != Insts.end() && "invalid sp adjusting instruction")((Info != Insts.end() && "invalid sp adjusting instruction" ) ? static_cast<void> (0) : __assert_fail ("Info != Insts.end() && \"invalid sp adjusting instruction\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 204, __PRETTY_FUNCTION__)); | |||
205 | Info->SPAdjust += ExtraBytes; | |||
206 | } | |||
207 | ||||
208 | void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB, | |||
209 | DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) { | |||
210 | unsigned CFAOffset = 0; | |||
211 | for (auto &Info : Insts) { | |||
212 | if (HasFP && !Info.BeforeFPSet) | |||
213 | return; | |||
214 | ||||
215 | CFAOffset -= Info.SPAdjust; | |||
216 | unsigned CFIIndex = MMI.addFrameInst( | |||
217 | MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); | |||
218 | BuildMI(MBB, std::next(Info.I), dl, | |||
219 | TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
220 | .addCFIIndex(CFIIndex) | |||
221 | .setMIFlags(MachineInstr::FrameSetup); | |||
222 | } | |||
223 | } | |||
224 | }; | |||
225 | } | |||
226 | ||||
227 | /// Emit an instruction sequence that will align the address in | |||
228 | /// register Reg by zero-ing out the lower bits. For versions of the | |||
229 | /// architecture that support Neon, this must be done in a single | |||
230 | /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a | |||
231 | /// single instruction. That function only gets called when optimizing | |||
232 | /// spilling of D registers on a core with the Neon instruction set | |||
233 | /// present. | |||
234 | static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, | |||
235 | const TargetInstrInfo &TII, | |||
236 | MachineBasicBlock &MBB, | |||
237 | MachineBasicBlock::iterator MBBI, | |||
238 | DebugLoc DL, const unsigned Reg, | |||
239 | const unsigned Alignment, | |||
240 | const bool MustBeSingleInstruction) { | |||
241 | const ARMSubtarget &AST = | |||
242 | static_cast<const ARMSubtarget &>(MF.getSubtarget()); | |||
243 | const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops(); | |||
244 | const unsigned AlignMask = Alignment - 1; | |||
245 | const unsigned NrBitsToZero = countTrailingZeros(Alignment); | |||
246 | assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported")((!AFI->isThumb1OnlyFunction() && "Thumb1 not supported" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"Thumb1 not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 246, __PRETTY_FUNCTION__)); | |||
247 | if (!AFI->isThumbFunction()) { | |||
248 | // if the BFC instruction is available, use that to zero the lower | |||
249 | // bits: | |||
250 | // bfc Reg, #0, log2(Alignment) | |||
251 | // otherwise use BIC, if the mask to zero the required number of bits | |||
252 | // can be encoded in the bic immediate field | |||
253 | // bic Reg, Reg, Alignment-1 | |||
254 | // otherwise, emit | |||
255 | // lsr Reg, Reg, log2(Alignment) | |||
256 | // lsl Reg, Reg, log2(Alignment) | |||
257 | if (CanUseBFC) { | |||
258 | AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg) | |||
259 | .addReg(Reg, RegState::Kill) | |||
260 | .addImm(~AlignMask)); | |||
261 | } else if (AlignMask <= 255) { | |||
262 | AddDefaultCC( | |||
263 | AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg) | |||
264 | .addReg(Reg, RegState::Kill) | |||
265 | .addImm(AlignMask))); | |||
266 | } else { | |||
267 | assert(!MustBeSingleInstruction &&((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single " "instruction to be emitted for large stack alignment for a target " "without BFC.") ? static_cast<void> (0) : __assert_fail ("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 270, __PRETTY_FUNCTION__)) | |||
268 | "Shouldn't call emitAligningInstructions demanding a single "((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single " "instruction to be emitted for large stack alignment for a target " "without BFC.") ? static_cast<void> (0) : __assert_fail ("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 270, __PRETTY_FUNCTION__)) | |||
269 | "instruction to be emitted for large stack alignment for a target "((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single " "instruction to be emitted for large stack alignment for a target " "without BFC.") ? static_cast<void> (0) : __assert_fail ("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 270, __PRETTY_FUNCTION__)) | |||
270 | "without BFC.")((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single " "instruction to be emitted for large stack alignment for a target " "without BFC.") ? static_cast<void> (0) : __assert_fail ("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 270, __PRETTY_FUNCTION__)); | |||
271 | AddDefaultCC(AddDefaultPred( | |||
272 | BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) | |||
273 | .addReg(Reg, RegState::Kill) | |||
274 | .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero)))); | |||
275 | AddDefaultCC(AddDefaultPred( | |||
276 | BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) | |||
277 | .addReg(Reg, RegState::Kill) | |||
278 | .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero)))); | |||
279 | } | |||
280 | } else { | |||
281 | // Since this is only reached for Thumb-2 targets, the BFC instruction | |||
282 | // should always be available. | |||
283 | assert(CanUseBFC)((CanUseBFC) ? static_cast<void> (0) : __assert_fail ("CanUseBFC" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 283, __PRETTY_FUNCTION__)); | |||
284 | AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg) | |||
285 | .addReg(Reg, RegState::Kill) | |||
286 | .addImm(~AlignMask)); | |||
287 | } | |||
288 | } | |||
289 | ||||
290 | void ARMFrameLowering::emitPrologue(MachineFunction &MF, | |||
291 | MachineBasicBlock &MBB) const { | |||
292 | MachineBasicBlock::iterator MBBI = MBB.begin(); | |||
293 | MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
294 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
295 | MachineModuleInfo &MMI = MF.getMMI(); | |||
296 | MCContext &Context = MMI.getContext(); | |||
297 | const TargetMachine &TM = MF.getTarget(); | |||
298 | const MCRegisterInfo *MRI = Context.getRegisterInfo(); | |||
299 | const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo(); | |||
300 | const ARMBaseInstrInfo &TII = *STI.getInstrInfo(); | |||
301 | assert(!AFI->isThumb1OnlyFunction() &&((!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitPrologue does not support Thumb1!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 302, __PRETTY_FUNCTION__)) | |||
302 | "This emitPrologue does not support Thumb1!")((!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitPrologue does not support Thumb1!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 302, __PRETTY_FUNCTION__)); | |||
303 | bool isARM = !AFI->isThumbFunction(); | |||
304 | unsigned Align = STI.getFrameLowering()->getStackAlignment(); | |||
305 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); | |||
306 | unsigned NumBytes = MFI->getStackSize(); | |||
307 | const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); | |||
308 | ||||
309 | // Debug location must be unknown since the first debug location is used | |||
310 | // to determine the end of the prologue. | |||
311 | DebugLoc dl; | |||
312 | ||||
313 | unsigned FramePtr = RegInfo->getFrameRegister(MF); | |||
314 | ||||
315 | // Determine the sizes of each callee-save spill areas and record which frame | |||
316 | // belongs to which callee-save spill areas. | |||
317 | unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; | |||
318 | int FramePtrSpillFI = 0; | |||
319 | int D8SpillFI = 0; | |||
320 | ||||
321 | // All calls are tail calls in GHC calling conv, and functions have no | |||
322 | // prologue/epilogue. | |||
323 | if (MF.getFunction()->getCallingConv() == CallingConv::GHC) | |||
324 | return; | |||
325 | ||||
326 | StackAdjustingInsts DefCFAOffsetCandidates; | |||
327 | bool HasFP = hasFP(MF); | |||
328 | ||||
329 | // Allocate the vararg register save area. | |||
330 | if (ArgRegsSaveSize) { | |||
331 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, | |||
332 | MachineInstr::FrameSetup); | |||
333 | DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true); | |||
334 | } | |||
335 | ||||
336 | if (!AFI->hasStackFrame() && | |||
337 | (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) { | |||
338 | if (NumBytes - ArgRegsSaveSize != 0) { | |||
339 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), | |||
340 | MachineInstr::FrameSetup); | |||
341 | DefCFAOffsetCandidates.addInst(std::prev(MBBI), | |||
342 | NumBytes - ArgRegsSaveSize, true); | |||
343 | } | |||
344 | DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); | |||
345 | return; | |||
346 | } | |||
347 | ||||
348 | // Determine spill area sizes. | |||
349 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { | |||
350 | unsigned Reg = CSI[i].getReg(); | |||
351 | int FI = CSI[i].getFrameIdx(); | |||
352 | switch (Reg) { | |||
353 | case ARM::R8: | |||
354 | case ARM::R9: | |||
355 | case ARM::R10: | |||
356 | case ARM::R11: | |||
357 | case ARM::R12: | |||
358 | if (STI.isTargetMachO()) { | |||
359 | GPRCS2Size += 4; | |||
360 | break; | |||
361 | } | |||
362 | // fallthrough | |||
363 | case ARM::R0: | |||
364 | case ARM::R1: | |||
365 | case ARM::R2: | |||
366 | case ARM::R3: | |||
367 | case ARM::R4: | |||
368 | case ARM::R5: | |||
369 | case ARM::R6: | |||
370 | case ARM::R7: | |||
371 | case ARM::LR: | |||
372 | if (Reg == FramePtr) | |||
373 | FramePtrSpillFI = FI; | |||
374 | GPRCS1Size += 4; | |||
375 | break; | |||
376 | default: | |||
377 | // This is a DPR. Exclude the aligned DPRCS2 spills. | |||
378 | if (Reg == ARM::D8) | |||
379 | D8SpillFI = FI; | |||
380 | if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) | |||
381 | DPRCSSize += 8; | |||
382 | } | |||
383 | } | |||
384 | ||||
385 | // Move past area 1. | |||
386 | MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push; | |||
387 | if (GPRCS1Size > 0) { | |||
388 | GPRCS1Push = LastPush = MBBI++; | |||
389 | DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true); | |||
390 | } | |||
391 | ||||
392 | // Determine starting offsets of spill areas. | |||
393 | unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size; | |||
394 | unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; | |||
395 | unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U; | |||
396 | unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign; | |||
397 | unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; | |||
398 | int FramePtrOffsetInPush = 0; | |||
399 | if (HasFP) { | |||
400 | FramePtrOffsetInPush = | |||
401 | MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; | |||
402 | AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + | |||
403 | NumBytes); | |||
404 | } | |||
405 | AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); | |||
406 | AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); | |||
407 | AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); | |||
408 | ||||
409 | // Move past area 2. | |||
410 | if (GPRCS2Size > 0) { | |||
411 | GPRCS2Push = LastPush = MBBI++; | |||
412 | DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); | |||
413 | } | |||
414 | ||||
415 | // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our | |||
416 | // .cfi_offset operations will reflect that. | |||
417 | if (DPRGapSize) { | |||
418 | assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs")((DPRGapSize == 4 && "unexpected alignment requirements for DPRs" ) ? static_cast<void> (0) : __assert_fail ("DPRGapSize == 4 && \"unexpected alignment requirements for DPRs\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 418, __PRETTY_FUNCTION__)); | |||
419 | if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize)) | |||
420 | DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize); | |||
421 | else { | |||
422 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize, | |||
423 | MachineInstr::FrameSetup); | |||
424 | DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize); | |||
425 | } | |||
426 | } | |||
427 | ||||
428 | // Move past area 3. | |||
429 | if (DPRCSSize > 0) { | |||
430 | // Since vpush register list cannot have gaps, there may be multiple vpush | |||
431 | // instructions in the prologue. | |||
432 | while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) { | |||
433 | DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI)); | |||
434 | LastPush = MBBI++; | |||
435 | } | |||
436 | } | |||
437 | ||||
438 | // Move past the aligned DPRCS2 area. | |||
439 | if (AFI->getNumAlignedDPRCS2Regs() > 0) { | |||
440 | MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs()); | |||
441 | // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and | |||
442 | // leaves the stack pointer pointing to the DPRCS2 area. | |||
443 | // | |||
444 | // Adjust NumBytes to represent the stack slots below the DPRCS2 area. | |||
445 | NumBytes += MFI->getObjectOffset(D8SpillFI); | |||
446 | } else | |||
447 | NumBytes = DPRCSOffset; | |||
448 | ||||
449 | if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) { | |||
450 | uint32_t NumWords = NumBytes >> 2; | |||
451 | ||||
452 | if (NumWords < 65536) | |||
453 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) | |||
454 | .addImm(NumWords) | |||
455 | .setMIFlags(MachineInstr::FrameSetup)); | |||
456 | else | |||
457 | BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4) | |||
458 | .addImm(NumWords) | |||
459 | .setMIFlags(MachineInstr::FrameSetup); | |||
460 | ||||
461 | switch (TM.getCodeModel()) { | |||
462 | case CodeModel::Small: | |||
463 | case CodeModel::Medium: | |||
464 | case CodeModel::Default: | |||
465 | case CodeModel::Kernel: | |||
466 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL)) | |||
467 | .addImm((unsigned)ARMCC::AL).addReg(0) | |||
468 | .addExternalSymbol("__chkstk") | |||
469 | .addReg(ARM::R4, RegState::Implicit) | |||
470 | .setMIFlags(MachineInstr::FrameSetup); | |||
471 | break; | |||
472 | case CodeModel::Large: | |||
473 | case CodeModel::JITDefault: | |||
474 | BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12) | |||
475 | .addExternalSymbol("__chkstk") | |||
476 | .setMIFlags(MachineInstr::FrameSetup); | |||
477 | ||||
478 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr)) | |||
479 | .addImm((unsigned)ARMCC::AL).addReg(0) | |||
480 | .addReg(ARM::R12, RegState::Kill) | |||
481 | .addReg(ARM::R4, RegState::Implicit) | |||
482 | .setMIFlags(MachineInstr::FrameSetup); | |||
483 | break; | |||
484 | } | |||
485 | ||||
486 | AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), | |||
487 | ARM::SP) | |||
488 | .addReg(ARM::SP, RegState::Kill) | |||
489 | .addReg(ARM::R4, RegState::Kill) | |||
490 | .setMIFlags(MachineInstr::FrameSetup))); | |||
491 | NumBytes = 0; | |||
492 | } | |||
493 | ||||
494 | if (NumBytes) { | |||
495 | // Adjust SP after all the callee-save spills. | |||
496 | if (AFI->getNumAlignedDPRCS2Regs() == 0 && | |||
497 | tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) | |||
498 | DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes); | |||
499 | else { | |||
500 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, | |||
501 | MachineInstr::FrameSetup); | |||
502 | DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes); | |||
503 | } | |||
504 | ||||
505 | if (HasFP && isARM) | |||
506 | // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 | |||
507 | // Note it's not safe to do this in Thumb2 mode because it would have | |||
508 | // taken two instructions: | |||
509 | // mov sp, r7 | |||
510 | // sub sp, #24 | |||
511 | // If an interrupt is taken between the two instructions, then sp is in | |||
512 | // an inconsistent state (pointing to the middle of callee-saved area). | |||
513 | // The interrupt handler can end up clobbering the registers. | |||
514 | AFI->setShouldRestoreSPFromFP(true); | |||
515 | } | |||
516 | ||||
517 | // Set FP to point to the stack slot that contains the previous FP. | |||
518 | // For iOS, FP is R7, which has now been stored in spill area 1. | |||
519 | // Otherwise, if this is not iOS, all the callee-saved registers go | |||
520 | // into spill area 1, including the FP in R11. In either case, it | |||
521 | // is in area one and the adjustment needs to take place just after | |||
522 | // that push. | |||
523 | if (HasFP) { | |||
524 | MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); | |||
525 | unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push); | |||
526 | emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, | |||
527 | dl, TII, FramePtr, ARM::SP, | |||
528 | PushSize + FramePtrOffsetInPush, | |||
529 | MachineInstr::FrameSetup); | |||
530 | if (FramePtrOffsetInPush + PushSize != 0) { | |||
531 | unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( | |||
532 | nullptr, MRI->getDwarfRegNum(FramePtr, true), | |||
533 | -(ArgRegsSaveSize - FramePtrOffsetInPush))); | |||
534 | BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
535 | .addCFIIndex(CFIIndex) | |||
536 | .setMIFlags(MachineInstr::FrameSetup); | |||
537 | } else { | |||
538 | unsigned CFIIndex = | |||
539 | MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( | |||
540 | nullptr, MRI->getDwarfRegNum(FramePtr, true))); | |||
541 | BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
542 | .addCFIIndex(CFIIndex) | |||
543 | .setMIFlags(MachineInstr::FrameSetup); | |||
544 | } | |||
545 | } | |||
546 | ||||
547 | // Now that the prologue's actual instructions are finalised, we can insert | |||
548 | // the necessary DWARF cf instructions to describe the situation. Start by | |||
549 | // recording where each register ended up: | |||
550 | if (GPRCS1Size > 0) { | |||
551 | MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); | |||
552 | int CFIIndex; | |||
553 | for (const auto &Entry : CSI) { | |||
554 | unsigned Reg = Entry.getReg(); | |||
555 | int FI = Entry.getFrameIdx(); | |||
556 | switch (Reg) { | |||
557 | case ARM::R8: | |||
558 | case ARM::R9: | |||
559 | case ARM::R10: | |||
560 | case ARM::R11: | |||
561 | case ARM::R12: | |||
562 | if (STI.isTargetMachO()) | |||
563 | break; | |||
564 | // fallthrough | |||
565 | case ARM::R0: | |||
566 | case ARM::R1: | |||
567 | case ARM::R2: | |||
568 | case ARM::R3: | |||
569 | case ARM::R4: | |||
570 | case ARM::R5: | |||
571 | case ARM::R6: | |||
572 | case ARM::R7: | |||
573 | case ARM::LR: | |||
574 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( | |||
575 | nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); | |||
576 | BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
577 | .addCFIIndex(CFIIndex) | |||
578 | .setMIFlags(MachineInstr::FrameSetup); | |||
579 | break; | |||
580 | } | |||
581 | } | |||
582 | } | |||
583 | ||||
584 | if (GPRCS2Size > 0) { | |||
585 | MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); | |||
586 | for (const auto &Entry : CSI) { | |||
587 | unsigned Reg = Entry.getReg(); | |||
588 | int FI = Entry.getFrameIdx(); | |||
589 | switch (Reg) { | |||
590 | case ARM::R8: | |||
591 | case ARM::R9: | |||
592 | case ARM::R10: | |||
593 | case ARM::R11: | |||
594 | case ARM::R12: | |||
595 | if (STI.isTargetMachO()) { | |||
596 | unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); | |||
597 | unsigned Offset = MFI->getObjectOffset(FI); | |||
598 | unsigned CFIIndex = MMI.addFrameInst( | |||
599 | MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); | |||
600 | BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
601 | .addCFIIndex(CFIIndex) | |||
602 | .setMIFlags(MachineInstr::FrameSetup); | |||
603 | } | |||
604 | break; | |||
605 | } | |||
606 | } | |||
607 | } | |||
608 | ||||
609 | if (DPRCSSize > 0) { | |||
610 | // Since vpush register list cannot have gaps, there may be multiple vpush | |||
611 | // instructions in the prologue. | |||
612 | MachineBasicBlock::iterator Pos = std::next(LastPush); | |||
613 | for (const auto &Entry : CSI) { | |||
614 | unsigned Reg = Entry.getReg(); | |||
615 | int FI = Entry.getFrameIdx(); | |||
616 | if ((Reg >= ARM::D0 && Reg <= ARM::D31) && | |||
617 | (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) { | |||
618 | unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); | |||
619 | unsigned Offset = MFI->getObjectOffset(FI); | |||
620 | unsigned CFIIndex = MMI.addFrameInst( | |||
621 | MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); | |||
622 | BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
623 | .addCFIIndex(CFIIndex) | |||
624 | .setMIFlags(MachineInstr::FrameSetup); | |||
625 | } | |||
626 | } | |||
627 | } | |||
628 | ||||
629 | // Now we can emit descriptions of where the canonical frame address was | |||
630 | // throughout the process. If we have a frame pointer, it takes over the job | |||
631 | // half-way through, so only the first few .cfi_def_cfa_offset instructions | |||
632 | // actually get emitted. | |||
633 | DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); | |||
634 | ||||
635 | if (STI.isTargetELF() && hasFP(MF)) | |||
636 | MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - | |||
637 | AFI->getFramePtrSpillOffset()); | |||
638 | ||||
639 | AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); | |||
640 | AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); | |||
641 | AFI->setDPRCalleeSavedGapSize(DPRGapSize); | |||
642 | AFI->setDPRCalleeSavedAreaSize(DPRCSSize); | |||
643 | ||||
644 | // If we need dynamic stack realignment, do it here. Be paranoid and make | |||
645 | // sure if we also have VLAs, we have a base pointer for frame access. | |||
646 | // If aligned NEON registers were spilled, the stack has already been | |||
647 | // realigned. | |||
648 | if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { | |||
649 | unsigned MaxAlign = MFI->getMaxAlignment(); | |||
650 | assert(!AFI->isThumb1OnlyFunction())((!AFI->isThumb1OnlyFunction()) ? static_cast<void> ( 0) : __assert_fail ("!AFI->isThumb1OnlyFunction()", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 650, __PRETTY_FUNCTION__)); | |||
651 | if (!AFI->isThumbFunction()) { | |||
652 | emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign, | |||
653 | false); | |||
654 | } else { | |||
655 | // We cannot use sp as source/dest register here, thus we're using r4 to | |||
656 | // perform the calculations. We're emitting the following sequence: | |||
657 | // mov r4, sp | |||
658 | // -- use emitAligningInstructions to produce best sequence to zero | |||
659 | // -- out lower bits in r4 | |||
660 | // mov sp, r4 | |||
661 | // FIXME: It will be better just to find spare register here. | |||
662 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) | |||
663 | .addReg(ARM::SP, RegState::Kill)); | |||
664 | emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign, | |||
665 | false); | |||
666 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) | |||
667 | .addReg(ARM::R4, RegState::Kill)); | |||
668 | } | |||
669 | ||||
670 | AFI->setShouldRestoreSPFromFP(true); | |||
671 | } | |||
672 | ||||
673 | // If we need a base pointer, set it up here. It's whatever the value | |||
674 | // of the stack pointer is at this point. Any variable size objects | |||
675 | // will be allocated after this, so we can still use the base pointer | |||
676 | // to reference locals. | |||
677 | // FIXME: Clarify FrameSetup flags here. | |||
678 | if (RegInfo->hasBasePointer(MF)) { | |||
679 | if (isARM) | |||
680 | BuildMI(MBB, MBBI, dl, | |||
681 | TII.get(ARM::MOVr), RegInfo->getBaseRegister()) | |||
682 | .addReg(ARM::SP) | |||
683 | .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); | |||
684 | else | |||
685 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), | |||
686 | RegInfo->getBaseRegister()) | |||
687 | .addReg(ARM::SP)); | |||
688 | } | |||
689 | ||||
690 | // If the frame has variable sized objects then the epilogue must restore | |||
691 | // the sp from fp. We can assume there's an FP here since hasFP already | |||
692 | // checks for hasVarSizedObjects. | |||
693 | if (MFI->hasVarSizedObjects()) | |||
694 | AFI->setShouldRestoreSPFromFP(true); | |||
695 | } | |||
696 | ||||
697 | void ARMFrameLowering::emitEpilogue(MachineFunction &MF, | |||
698 | MachineBasicBlock &MBB) const { | |||
699 | MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
700 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
701 | const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); | |||
702 | const ARMBaseInstrInfo &TII = | |||
703 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); | |||
704 | assert(!AFI->isThumb1OnlyFunction() &&((!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitEpilogue does not support Thumb1!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 705, __PRETTY_FUNCTION__)) | |||
705 | "This emitEpilogue does not support Thumb1!")((!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitEpilogue does not support Thumb1!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 705, __PRETTY_FUNCTION__)); | |||
706 | bool isARM = !AFI->isThumbFunction(); | |||
707 | ||||
708 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); | |||
709 | int NumBytes = (int)MFI->getStackSize(); | |||
710 | unsigned FramePtr = RegInfo->getFrameRegister(MF); | |||
711 | ||||
712 | // All calls are tail calls in GHC calling conv, and functions have no | |||
713 | // prologue/epilogue. | |||
714 | if (MF.getFunction()->getCallingConv() == CallingConv::GHC) | |||
715 | return; | |||
716 | ||||
717 | // First put ourselves on the first (from top) terminator instructions. | |||
718 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); | |||
719 | DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); | |||
720 | ||||
721 | if (!AFI->hasStackFrame()) { | |||
722 | if (NumBytes - ArgRegsSaveSize != 0) | |||
723 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize); | |||
724 | } else { | |||
725 | // Unwind MBBI to point to first LDR / VLDRD. | |||
726 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); | |||
727 | if (MBBI != MBB.begin()) { | |||
728 | do { | |||
729 | --MBBI; | |||
730 | } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); | |||
731 | if (!isCSRestore(MBBI, TII, CSRegs)) | |||
732 | ++MBBI; | |||
733 | } | |||
734 | ||||
735 | // Move SP to start of FP callee save spill area. | |||
736 | NumBytes -= (ArgRegsSaveSize + | |||
737 | AFI->getGPRCalleeSavedArea1Size() + | |||
738 | AFI->getGPRCalleeSavedArea2Size() + | |||
739 | AFI->getDPRCalleeSavedGapSize() + | |||
740 | AFI->getDPRCalleeSavedAreaSize()); | |||
741 | ||||
742 | // Reset SP based on frame pointer only if the stack frame extends beyond | |||
743 | // frame pointer stack slot or target is ELF and the function has FP. | |||
744 | if (AFI->shouldRestoreSPFromFP()) { | |||
745 | NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; | |||
746 | if (NumBytes) { | |||
747 | if (isARM) | |||
748 | emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, | |||
749 | ARMCC::AL, 0, TII); | |||
750 | else { | |||
751 | // It's not possible to restore SP from FP in a single instruction. | |||
752 | // For iOS, this looks like: | |||
753 | // mov sp, r7 | |||
754 | // sub sp, #24 | |||
755 | // This is bad, if an interrupt is taken after the mov, sp is in an | |||
756 | // inconsistent state. | |||
757 | // Use the first callee-saved register as a scratch register. | |||
758 | assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&((!MFI->getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!" ) ? static_cast<void> (0) : __assert_fail ("!MFI->getPristineRegs(MF).test(ARM::R4) && \"No scratch register to restore SP from FP!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 759, __PRETTY_FUNCTION__)) | |||
759 | "No scratch register to restore SP from FP!")((!MFI->getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!" ) ? static_cast<void> (0) : __assert_fail ("!MFI->getPristineRegs(MF).test(ARM::R4) && \"No scratch register to restore SP from FP!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 759, __PRETTY_FUNCTION__)); | |||
760 | emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, | |||
761 | ARMCC::AL, 0, TII); | |||
762 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), | |||
763 | ARM::SP) | |||
764 | .addReg(ARM::R4)); | |||
765 | } | |||
766 | } else { | |||
767 | // Thumb2 or ARM. | |||
768 | if (isARM) | |||
769 | BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) | |||
770 | .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); | |||
771 | else | |||
772 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), | |||
773 | ARM::SP) | |||
774 | .addReg(FramePtr)); | |||
775 | } | |||
776 | } else if (NumBytes && | |||
777 | !tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) | |||
778 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); | |||
779 | ||||
780 | // Increment past our save areas. | |||
781 | if (AFI->getDPRCalleeSavedAreaSize()) { | |||
782 | MBBI++; | |||
783 | // Since vpop register list cannot have gaps, there may be multiple vpop | |||
784 | // instructions in the epilogue. | |||
785 | while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) | |||
786 | MBBI++; | |||
787 | } | |||
788 | if (AFI->getDPRCalleeSavedGapSize()) { | |||
789 | assert(AFI->getDPRCalleeSavedGapSize() == 4 &&((AFI->getDPRCalleeSavedGapSize() == 4 && "unexpected DPR alignment gap" ) ? static_cast<void> (0) : __assert_fail ("AFI->getDPRCalleeSavedGapSize() == 4 && \"unexpected DPR alignment gap\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 790, __PRETTY_FUNCTION__)) | |||
790 | "unexpected DPR alignment gap")((AFI->getDPRCalleeSavedGapSize() == 4 && "unexpected DPR alignment gap" ) ? static_cast<void> (0) : __assert_fail ("AFI->getDPRCalleeSavedGapSize() == 4 && \"unexpected DPR alignment gap\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 790, __PRETTY_FUNCTION__)); | |||
791 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize()); | |||
792 | } | |||
793 | ||||
794 | if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; | |||
795 | if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; | |||
796 | } | |||
797 | ||||
798 | if (ArgRegsSaveSize) | |||
799 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize); | |||
800 | } | |||
801 | ||||
802 | /// getFrameIndexReference - Provide a base+offset reference to an FI slot for | |||
803 | /// debug info. It's the same as what we use for resolving the code-gen | |||
804 | /// references for now. FIXME: This can go wrong when references are | |||
805 | /// SP-relative and simple call frames aren't used. | |||
806 | int | |||
807 | ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, | |||
808 | unsigned &FrameReg) const { | |||
809 | return ResolveFrameIndexReference(MF, FI, FrameReg, 0); | |||
810 | } | |||
811 | ||||
812 | int | |||
813 | ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, | |||
814 | int FI, unsigned &FrameReg, | |||
815 | int SPAdj) const { | |||
816 | const MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
817 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( | |||
818 | MF.getSubtarget().getRegisterInfo()); | |||
819 | const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
820 | int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); | |||
821 | int FPOffset = Offset - AFI->getFramePtrSpillOffset(); | |||
822 | bool isFixed = MFI->isFixedObjectIndex(FI); | |||
823 | ||||
824 | FrameReg = ARM::SP; | |||
825 | Offset += SPAdj; | |||
826 | ||||
827 | // SP can move around if there are allocas. We may also lose track of SP | |||
828 | // when emergency spilling inside a non-reserved call frame setup. | |||
829 | bool hasMovingSP = !hasReservedCallFrame(MF); | |||
830 | ||||
831 | // When dynamically realigning the stack, use the frame pointer for | |||
832 | // parameters, and the stack/base pointer for locals. | |||
833 | if (RegInfo->needsStackRealignment(MF)) { | |||
834 | assert (hasFP(MF) && "dynamic stack realignment without a FP!")((hasFP(MF) && "dynamic stack realignment without a FP!" ) ? static_cast<void> (0) : __assert_fail ("hasFP(MF) && \"dynamic stack realignment without a FP!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 834, __PRETTY_FUNCTION__)); | |||
835 | if (isFixed) { | |||
836 | FrameReg = RegInfo->getFrameRegister(MF); | |||
837 | Offset = FPOffset; | |||
838 | } else if (hasMovingSP) { | |||
839 | assert(RegInfo->hasBasePointer(MF) &&((RegInfo->hasBasePointer(MF) && "VLAs and dynamic stack alignment, but missing base pointer!" ) ? static_cast<void> (0) : __assert_fail ("RegInfo->hasBasePointer(MF) && \"VLAs and dynamic stack alignment, but missing base pointer!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 840, __PRETTY_FUNCTION__)) | |||
840 | "VLAs and dynamic stack alignment, but missing base pointer!")((RegInfo->hasBasePointer(MF) && "VLAs and dynamic stack alignment, but missing base pointer!" ) ? static_cast<void> (0) : __assert_fail ("RegInfo->hasBasePointer(MF) && \"VLAs and dynamic stack alignment, but missing base pointer!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 840, __PRETTY_FUNCTION__)); | |||
841 | FrameReg = RegInfo->getBaseRegister(); | |||
842 | } | |||
843 | return Offset; | |||
844 | } | |||
845 | ||||
846 | // If there is a frame pointer, use it when we can. | |||
847 | if (hasFP(MF) && AFI->hasStackFrame()) { | |||
848 | // Use frame pointer to reference fixed objects. Use it for locals if | |||
849 | // there are VLAs (and thus the SP isn't reliable as a base). | |||
850 | if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) { | |||
851 | FrameReg = RegInfo->getFrameRegister(MF); | |||
852 | return FPOffset; | |||
853 | } else if (hasMovingSP) { | |||
854 | assert(RegInfo->hasBasePointer(MF) && "missing base pointer!")((RegInfo->hasBasePointer(MF) && "missing base pointer!" ) ? static_cast<void> (0) : __assert_fail ("RegInfo->hasBasePointer(MF) && \"missing base pointer!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 854, __PRETTY_FUNCTION__)); | |||
855 | if (AFI->isThumb2Function()) { | |||
856 | // Try to use the frame pointer if we can, else use the base pointer | |||
857 | // since it's available. This is handy for the emergency spill slot, in | |||
858 | // particular. | |||
859 | if (FPOffset >= -255 && FPOffset < 0) { | |||
860 | FrameReg = RegInfo->getFrameRegister(MF); | |||
861 | return FPOffset; | |||
862 | } | |||
863 | } | |||
864 | } else if (AFI->isThumb2Function()) { | |||
865 | // Use add <rd>, sp, #<imm8> | |||
866 | // ldr <rd>, [sp, #<imm8>] | |||
867 | // if at all possible to save space. | |||
868 | if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020) | |||
869 | return Offset; | |||
870 | // In Thumb2 mode, the negative offset is very limited. Try to avoid | |||
871 | // out of range references. ldr <rt>,[<rn>, #-<imm8>] | |||
872 | if (FPOffset >= -255 && FPOffset < 0) { | |||
873 | FrameReg = RegInfo->getFrameRegister(MF); | |||
874 | return FPOffset; | |||
875 | } | |||
876 | } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) { | |||
877 | // Otherwise, use SP or FP, whichever is closer to the stack slot. | |||
878 | FrameReg = RegInfo->getFrameRegister(MF); | |||
879 | return FPOffset; | |||
880 | } | |||
881 | } | |||
882 | // Use the base pointer if we have one. | |||
883 | if (RegInfo->hasBasePointer(MF)) | |||
884 | FrameReg = RegInfo->getBaseRegister(); | |||
885 | return Offset; | |||
886 | } | |||
887 | ||||
888 | void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, | |||
889 | MachineBasicBlock::iterator MI, | |||
890 | const std::vector<CalleeSavedInfo> &CSI, | |||
891 | unsigned StmOpc, unsigned StrOpc, | |||
892 | bool NoGap, | |||
893 | bool(*Func)(unsigned, bool), | |||
894 | unsigned NumAlignedDPRCS2Regs, | |||
895 | unsigned MIFlags) const { | |||
896 | MachineFunction &MF = *MBB.getParent(); | |||
897 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); | |||
898 | ||||
899 | DebugLoc DL; | |||
900 | ||||
901 | SmallVector<std::pair<unsigned,bool>, 4> Regs; | |||
902 | unsigned i = CSI.size(); | |||
903 | while (i != 0) { | |||
904 | unsigned LastReg = 0; | |||
905 | for (; i != 0; --i) { | |||
906 | unsigned Reg = CSI[i-1].getReg(); | |||
907 | if (!(Func)(Reg, STI.isTargetMachO())) continue; | |||
908 | ||||
909 | // D-registers in the aligned area DPRCS2 are NOT spilled here. | |||
910 | if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) | |||
911 | continue; | |||
912 | ||||
913 | bool isLiveIn = MF.getRegInfo().isLiveIn(Reg); | |||
914 | if (!isLiveIn) | |||
915 | MBB.addLiveIn(Reg); | |||
916 | // If NoGap is true, push consecutive registers and then leave the rest | |||
917 | // for other instructions. e.g. | |||
918 | // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11} | |||
919 | if (NoGap && LastReg && LastReg != Reg-1) | |||
920 | break; | |||
921 | LastReg = Reg; | |||
922 | // Do not set a kill flag on values that are also marked as live-in. This | |||
923 | // happens with the @llvm-returnaddress intrinsic and with arguments | |||
924 | // passed in callee saved registers. | |||
925 | // Omitting the kill flags is conservatively correct even if the live-in | |||
926 | // is not used after all. | |||
927 | Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn)); | |||
928 | } | |||
929 | ||||
930 | if (Regs.empty()) | |||
931 | continue; | |||
932 | if (Regs.size() > 1 || StrOpc== 0) { | |||
933 | MachineInstrBuilder MIB = | |||
934 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) | |||
935 | .addReg(ARM::SP).setMIFlags(MIFlags)); | |||
936 | for (unsigned i = 0, e = Regs.size(); i < e; ++i) | |||
937 | MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); | |||
938 | } else if (Regs.size() == 1) { | |||
939 | MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), | |||
940 | ARM::SP) | |||
941 | .addReg(Regs[0].first, getKillRegState(Regs[0].second)) | |||
942 | .addReg(ARM::SP).setMIFlags(MIFlags) | |||
943 | .addImm(-4); | |||
944 | AddDefaultPred(MIB); | |||
945 | } | |||
946 | Regs.clear(); | |||
947 | ||||
948 | // Put any subsequent vpush instructions before this one: they will refer to | |||
949 | // higher register numbers so need to be pushed first in order to preserve | |||
950 | // monotonicity. | |||
951 | if (MI != MBB.begin()) | |||
952 | --MI; | |||
953 | } | |||
954 | } | |||
955 | ||||
956 | void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, | |||
957 | MachineBasicBlock::iterator MI, | |||
958 | const std::vector<CalleeSavedInfo> &CSI, | |||
959 | unsigned LdmOpc, unsigned LdrOpc, | |||
960 | bool isVarArg, bool NoGap, | |||
961 | bool(*Func)(unsigned, bool), | |||
962 | unsigned NumAlignedDPRCS2Regs) const { | |||
963 | MachineFunction &MF = *MBB.getParent(); | |||
964 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); | |||
965 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
966 | DebugLoc DL; | |||
967 | bool isTailCall = false; | |||
968 | bool isInterrupt = false; | |||
969 | bool isTrap = false; | |||
970 | if (MBB.end() != MI) { | |||
971 | DL = MI->getDebugLoc(); | |||
972 | unsigned RetOpcode = MI->getOpcode(); | |||
973 | isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri); | |||
974 | isInterrupt = | |||
975 | RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; | |||
976 | isTrap = | |||
977 | RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl || | |||
978 | RetOpcode == ARM::tTRAP; | |||
979 | } | |||
980 | ||||
981 | SmallVector<unsigned, 4> Regs; | |||
982 | unsigned i = CSI.size(); | |||
983 | while (i != 0) { | |||
984 | unsigned LastReg = 0; | |||
985 | bool DeleteRet = false; | |||
986 | for (; i != 0; --i) { | |||
987 | unsigned Reg = CSI[i-1].getReg(); | |||
988 | if (!(Func)(Reg, STI.isTargetMachO())) continue; | |||
989 | ||||
990 | // The aligned reloads from area DPRCS2 are not inserted here. | |||
991 | if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) | |||
992 | continue; | |||
993 | ||||
994 | if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && | |||
995 | !isTrap && STI.hasV5TOps()) { | |||
996 | if (MBB.succ_empty()) { | |||
997 | Reg = ARM::PC; | |||
998 | DeleteRet = true; | |||
999 | LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; | |||
1000 | } else | |||
1001 | LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; | |||
1002 | // Fold the return instruction into the LDM. | |||
1003 | } | |||
1004 | ||||
1005 | // If NoGap is true, pop consecutive registers and then leave the rest | |||
1006 | // for other instructions. e.g. | |||
1007 | // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11} | |||
1008 | if (NoGap && LastReg && LastReg != Reg-1) | |||
1009 | break; | |||
1010 | ||||
1011 | LastReg = Reg; | |||
1012 | Regs.push_back(Reg); | |||
1013 | } | |||
1014 | ||||
1015 | if (Regs.empty()) | |||
1016 | continue; | |||
1017 | if (Regs.size() > 1 || LdrOpc == 0) { | |||
1018 | MachineInstrBuilder MIB = | |||
1019 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) | |||
1020 | .addReg(ARM::SP)); | |||
1021 | for (unsigned i = 0, e = Regs.size(); i < e; ++i) | |||
1022 | MIB.addReg(Regs[i], getDefRegState(true)); | |||
1023 | if (DeleteRet && MI != MBB.end()) { | |||
1024 | MIB.copyImplicitOps(*MI); | |||
1025 | MI->eraseFromParent(); | |||
1026 | } | |||
1027 | MI = MIB; | |||
1028 | } else if (Regs.size() == 1) { | |||
1029 | // If we adjusted the reg to PC from LR above, switch it back here. We | |||
1030 | // only do that for LDM. | |||
1031 | if (Regs[0] == ARM::PC) | |||
1032 | Regs[0] = ARM::LR; | |||
1033 | MachineInstrBuilder MIB = | |||
1034 | BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0]) | |||
1035 | .addReg(ARM::SP, RegState::Define) | |||
1036 | .addReg(ARM::SP); | |||
1037 | // ARM mode needs an extra reg0 here due to addrmode2. Will go away once | |||
1038 | // that refactoring is complete (eventually). | |||
1039 | if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) { | |||
1040 | MIB.addReg(0); | |||
1041 | MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); | |||
1042 | } else | |||
1043 | MIB.addImm(4); | |||
1044 | AddDefaultPred(MIB); | |||
1045 | } | |||
1046 | Regs.clear(); | |||
1047 | ||||
1048 | // Put any subsequent vpop instructions after this one: they will refer to | |||
1049 | // higher register numbers so need to be popped afterwards. | |||
1050 | if (MI != MBB.end()) | |||
1051 | ++MI; | |||
1052 | } | |||
1053 | } | |||
1054 | ||||
1055 | /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers | |||
1056 | /// starting from d8. Also insert stack realignment code and leave the stack | |||
1057 | /// pointer pointing to the d8 spill slot. | |||
1058 | static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, | |||
1059 | MachineBasicBlock::iterator MI, | |||
1060 | unsigned NumAlignedDPRCS2Regs, | |||
1061 | const std::vector<CalleeSavedInfo> &CSI, | |||
1062 | const TargetRegisterInfo *TRI) { | |||
1063 | MachineFunction &MF = *MBB.getParent(); | |||
1064 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1065 | DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); | |||
1066 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); | |||
1067 | MachineFrameInfo &MFI = *MF.getFrameInfo(); | |||
1068 | ||||
1069 | // Mark the D-register spill slots as properly aligned. Since MFI computes | |||
1070 | // stack slot layout backwards, this can actually mean that the d-reg stack | |||
1071 | // slot offsets can be wrong. The offset for d8 will always be correct. | |||
1072 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { | |||
1073 | unsigned DNum = CSI[i].getReg() - ARM::D8; | |||
1074 | if (DNum > NumAlignedDPRCS2Regs - 1) | |||
1075 | continue; | |||
1076 | int FI = CSI[i].getFrameIdx(); | |||
1077 | // The even-numbered registers will be 16-byte aligned, the odd-numbered | |||
1078 | // registers will be 8-byte aligned. | |||
1079 | MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16); | |||
1080 | ||||
1081 | // The stack slot for D8 needs to be maximally aligned because this is | |||
1082 | // actually the point where we align the stack pointer. MachineFrameInfo | |||
1083 | // computes all offsets relative to the incoming stack pointer which is a | |||
1084 | // bit weird when realigning the stack. Any extra padding for this | |||
1085 | // over-alignment is not realized because the code inserted below adjusts | |||
1086 | // the stack pointer by numregs * 8 before aligning the stack pointer. | |||
1087 | if (DNum == 0) | |||
1088 | MFI.setObjectAlignment(FI, MFI.getMaxAlignment()); | |||
1089 | } | |||
1090 | ||||
1091 | // Move the stack pointer to the d8 spill slot, and align it at the same | |||
1092 | // time. Leave the stack slot address in the scratch register r4. | |||
1093 | // | |||
1094 | // sub r4, sp, #numregs * 8 | |||
1095 | // bic r4, r4, #align - 1 | |||
1096 | // mov sp, r4 | |||
1097 | // | |||
1098 | bool isThumb = AFI->isThumbFunction(); | |||
1099 | assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1")((!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"Can't realign stack for thumb1\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 1099, __PRETTY_FUNCTION__)); | |||
1100 | AFI->setShouldRestoreSPFromFP(true); | |||
1101 | ||||
1102 | // sub r4, sp, #numregs * 8 | |||
1103 | // The immediate is <= 64, so it doesn't need any special encoding. | |||
1104 | unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; | |||
1105 | AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) | |||
1106 | .addReg(ARM::SP) | |||
1107 | .addImm(8 * NumAlignedDPRCS2Regs))); | |||
1108 | ||||
1109 | unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); | |||
1110 | // We must set parameter MustBeSingleInstruction to true, since | |||
1111 | // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform | |||
1112 | // stack alignment. Luckily, this can always be done since all ARM | |||
1113 | // architecture versions that support Neon also support the BFC | |||
1114 | // instruction. | |||
1115 | emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true); | |||
1116 | ||||
1117 | // mov sp, r4 | |||
1118 | // The stack pointer must be adjusted before spilling anything, otherwise | |||
1119 | // the stack slots could be clobbered by an interrupt handler. | |||
1120 | // Leave r4 live, it is used below. | |||
1121 | Opc = isThumb ? ARM::tMOVr : ARM::MOVr; | |||
1122 | MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP) | |||
1123 | .addReg(ARM::R4); | |||
1124 | MIB = AddDefaultPred(MIB); | |||
1125 | if (!isThumb) | |||
1126 | AddDefaultCC(MIB); | |||
1127 | ||||
1128 | // Now spill NumAlignedDPRCS2Regs registers starting from d8. | |||
1129 | // r4 holds the stack slot address. | |||
1130 | unsigned NextReg = ARM::D8; | |||
1131 | ||||
1132 | // 16-byte aligned vst1.64 with 4 d-regs and address writeback. | |||
1133 | // The writeback is only needed when emitting two vst1.64 instructions. | |||
1134 | if (NumAlignedDPRCS2Regs >= 6) { | |||
1135 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |||
1136 | &ARM::QQPRRegClass); | |||
1137 | MBB.addLiveIn(SupReg); | |||
1138 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), | |||
1139 | ARM::R4) | |||
1140 | .addReg(ARM::R4, RegState::Kill).addImm(16) | |||
1141 | .addReg(NextReg) | |||
1142 | .addReg(SupReg, RegState::ImplicitKill)); | |||
1143 | NextReg += 4; | |||
1144 | NumAlignedDPRCS2Regs -= 4; | |||
1145 | } | |||
1146 | ||||
1147 | // We won't modify r4 beyond this point. It currently points to the next | |||
1148 | // register to be spilled. | |||
1149 | unsigned R4BaseReg = NextReg; | |||
1150 | ||||
1151 | // 16-byte aligned vst1.64 with 4 d-regs, no writeback. | |||
1152 | if (NumAlignedDPRCS2Regs >= 4) { | |||
1153 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |||
1154 | &ARM::QQPRRegClass); | |||
1155 | MBB.addLiveIn(SupReg); | |||
1156 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) | |||
1157 | .addReg(ARM::R4).addImm(16).addReg(NextReg) | |||
1158 | .addReg(SupReg, RegState::ImplicitKill)); | |||
1159 | NextReg += 4; | |||
1160 | NumAlignedDPRCS2Regs -= 4; | |||
1161 | } | |||
1162 | ||||
1163 | // 16-byte aligned vst1.64 with 2 d-regs. | |||
1164 | if (NumAlignedDPRCS2Regs >= 2) { | |||
1165 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |||
1166 | &ARM::QPRRegClass); | |||
1167 | MBB.addLiveIn(SupReg); | |||
1168 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) | |||
1169 | .addReg(ARM::R4).addImm(16).addReg(SupReg)); | |||
1170 | NextReg += 2; | |||
1171 | NumAlignedDPRCS2Regs -= 2; | |||
1172 | } | |||
1173 | ||||
1174 | // Finally, use a vanilla vstr.64 for the odd last register. | |||
1175 | if (NumAlignedDPRCS2Regs) { | |||
1176 | MBB.addLiveIn(NextReg); | |||
1177 | // vstr.64 uses addrmode5 which has an offset scale of 4. | |||
1178 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) | |||
1179 | .addReg(NextReg) | |||
1180 | .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2)); | |||
1181 | } | |||
1182 | ||||
1183 | // The last spill instruction inserted should kill the scratch register r4. | |||
1184 | std::prev(MI)->addRegisterKilled(ARM::R4, TRI); | |||
1185 | } | |||
1186 | ||||
1187 | /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an | |||
1188 | /// iterator to the following instruction. | |||
1189 | static MachineBasicBlock::iterator | |||
1190 | skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, | |||
1191 | unsigned NumAlignedDPRCS2Regs) { | |||
1192 | // sub r4, sp, #numregs * 8 | |||
1193 | // bic r4, r4, #align - 1 | |||
1194 | // mov sp, r4 | |||
1195 | ++MI; ++MI; ++MI; | |||
1196 | assert(MI->mayStore() && "Expecting spill instruction")((MI->mayStore() && "Expecting spill instruction") ? static_cast<void> (0) : __assert_fail ("MI->mayStore() && \"Expecting spill instruction\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 1196, __PRETTY_FUNCTION__)); | |||
1197 | ||||
1198 | // These switches all fall through. | |||
1199 | switch(NumAlignedDPRCS2Regs) { | |||
1200 | case 7: | |||
1201 | ++MI; | |||
1202 | assert(MI->mayStore() && "Expecting spill instruction")((MI->mayStore() && "Expecting spill instruction") ? static_cast<void> (0) : __assert_fail ("MI->mayStore() && \"Expecting spill instruction\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 1202, __PRETTY_FUNCTION__)); | |||
1203 | default: | |||
1204 | ++MI; | |||
1205 | assert(MI->mayStore() && "Expecting spill instruction")((MI->mayStore() && "Expecting spill instruction") ? static_cast<void> (0) : __assert_fail ("MI->mayStore() && \"Expecting spill instruction\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 1205, __PRETTY_FUNCTION__)); | |||
1206 | case 1: | |||
1207 | case 2: | |||
1208 | case 4: | |||
1209 | assert(MI->killsRegister(ARM::R4) && "Missed kill flag")((MI->killsRegister(ARM::R4) && "Missed kill flag" ) ? static_cast<void> (0) : __assert_fail ("MI->killsRegister(ARM::R4) && \"Missed kill flag\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 1209, __PRETTY_FUNCTION__)); | |||
1210 | ++MI; | |||
1211 | } | |||
1212 | return MI; | |||
1213 | } | |||
1214 | ||||
1215 | /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers | |||
1216 | /// starting from d8. These instructions are assumed to execute while the | |||
1217 | /// stack is still aligned, unlike the code inserted by emitPopInst. | |||
1218 | static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, | |||
1219 | MachineBasicBlock::iterator MI, | |||
1220 | unsigned NumAlignedDPRCS2Regs, | |||
1221 | const std::vector<CalleeSavedInfo> &CSI, | |||
1222 | const TargetRegisterInfo *TRI) { | |||
1223 | MachineFunction &MF = *MBB.getParent(); | |||
1224 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1225 | DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); | |||
1226 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); | |||
1227 | ||||
1228 | // Find the frame index assigned to d8. | |||
1229 | int D8SpillFI = 0; | |||
1230 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) | |||
1231 | if (CSI[i].getReg() == ARM::D8) { | |||
1232 | D8SpillFI = CSI[i].getFrameIdx(); | |||
1233 | break; | |||
1234 | } | |||
1235 | ||||
1236 | // Materialize the address of the d8 spill slot into the scratch register r4. | |||
1237 | // This can be fairly complicated if the stack frame is large, so just use | |||
1238 | // the normal frame index elimination mechanism to do it. This code runs as | |||
1239 | // the initial part of the epilog where the stack and base pointers haven't | |||
1240 | // been changed yet. | |||
1241 | bool isThumb = AFI->isThumbFunction(); | |||
1242 | assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1")((!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"Can't realign stack for thumb1\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 1242, __PRETTY_FUNCTION__)); | |||
1243 | ||||
1244 | unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; | |||
1245 | AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) | |||
1246 | .addFrameIndex(D8SpillFI).addImm(0))); | |||
1247 | ||||
1248 | // Now restore NumAlignedDPRCS2Regs registers starting from d8. | |||
1249 | unsigned NextReg = ARM::D8; | |||
1250 | ||||
1251 | // 16-byte aligned vld1.64 with 4 d-regs and writeback. | |||
1252 | if (NumAlignedDPRCS2Regs >= 6) { | |||
1253 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |||
1254 | &ARM::QQPRRegClass); | |||
1255 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) | |||
1256 | .addReg(ARM::R4, RegState::Define) | |||
1257 | .addReg(ARM::R4, RegState::Kill).addImm(16) | |||
1258 | .addReg(SupReg, RegState::ImplicitDefine)); | |||
1259 | NextReg += 4; | |||
1260 | NumAlignedDPRCS2Regs -= 4; | |||
1261 | } | |||
1262 | ||||
1263 | // We won't modify r4 beyond this point. It currently points to the next | |||
1264 | // register to be spilled. | |||
1265 | unsigned R4BaseReg = NextReg; | |||
1266 | ||||
1267 | // 16-byte aligned vld1.64 with 4 d-regs, no writeback. | |||
1268 | if (NumAlignedDPRCS2Regs >= 4) { | |||
1269 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |||
1270 | &ARM::QQPRRegClass); | |||
1271 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) | |||
1272 | .addReg(ARM::R4).addImm(16) | |||
1273 | .addReg(SupReg, RegState::ImplicitDefine)); | |||
1274 | NextReg += 4; | |||
1275 | NumAlignedDPRCS2Regs -= 4; | |||
1276 | } | |||
1277 | ||||
1278 | // 16-byte aligned vld1.64 with 2 d-regs. | |||
1279 | if (NumAlignedDPRCS2Regs >= 2) { | |||
1280 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |||
1281 | &ARM::QPRRegClass); | |||
1282 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) | |||
1283 | .addReg(ARM::R4).addImm(16)); | |||
1284 | NextReg += 2; | |||
1285 | NumAlignedDPRCS2Regs -= 2; | |||
1286 | } | |||
1287 | ||||
1288 | // Finally, use a vanilla vldr.64 for the remaining odd register. | |||
1289 | if (NumAlignedDPRCS2Regs) | |||
1290 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) | |||
1291 | .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg))); | |||
1292 | ||||
1293 | // Last store kills r4. | |||
1294 | std::prev(MI)->addRegisterKilled(ARM::R4, TRI); | |||
1295 | } | |||
1296 | ||||
1297 | bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, | |||
1298 | MachineBasicBlock::iterator MI, | |||
1299 | const std::vector<CalleeSavedInfo> &CSI, | |||
1300 | const TargetRegisterInfo *TRI) const { | |||
1301 | if (CSI.empty()) | |||
1302 | return false; | |||
1303 | ||||
1304 | MachineFunction &MF = *MBB.getParent(); | |||
1305 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1306 | ||||
1307 | unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD; | |||
1308 | unsigned PushOneOpc = AFI->isThumbFunction() ? | |||
1309 | ARM::t2STR_PRE : ARM::STR_PRE_IMM; | |||
1310 | unsigned FltOpc = ARM::VSTMDDB_UPD; | |||
1311 | unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); | |||
1312 | emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, | |||
1313 | MachineInstr::FrameSetup); | |||
1314 | emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, | |||
1315 | MachineInstr::FrameSetup); | |||
1316 | emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, | |||
1317 | NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); | |||
1318 | ||||
1319 | // The code above does not insert spill code for the aligned DPRCS2 registers. | |||
1320 | // The stack realignment code will be inserted between the push instructions | |||
1321 | // and these spills. | |||
1322 | if (NumAlignedDPRCS2Regs) | |||
1323 | emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); | |||
1324 | ||||
1325 | return true; | |||
1326 | } | |||
1327 | ||||
1328 | bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, | |||
1329 | MachineBasicBlock::iterator MI, | |||
1330 | const std::vector<CalleeSavedInfo> &CSI, | |||
1331 | const TargetRegisterInfo *TRI) const { | |||
1332 | if (CSI.empty()) | |||
1333 | return false; | |||
1334 | ||||
1335 | MachineFunction &MF = *MBB.getParent(); | |||
1336 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1337 | bool isVarArg = AFI->getArgRegsSaveSize() > 0; | |||
1338 | unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); | |||
1339 | ||||
1340 | // The emitPopInst calls below do not insert reloads for the aligned DPRCS2 | |||
1341 | // registers. Do that here instead. | |||
1342 | if (NumAlignedDPRCS2Regs) | |||
1343 | emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); | |||
1344 | ||||
1345 | unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; | |||
1346 | unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; | |||
1347 | unsigned FltOpc = ARM::VLDMDIA_UPD; | |||
1348 | emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, | |||
1349 | NumAlignedDPRCS2Regs); | |||
1350 | emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, | |||
1351 | &isARMArea2Register, 0); | |||
1352 | emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, | |||
1353 | &isARMArea1Register, 0); | |||
1354 | ||||
1355 | return true; | |||
1356 | } | |||
1357 | ||||
1358 | // FIXME: Make generic? | |||
1359 | static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, | |||
1360 | const ARMBaseInstrInfo &TII) { | |||
1361 | unsigned FnSize = 0; | |||
1362 | for (auto &MBB : MF) { | |||
1363 | for (auto &MI : MBB) | |||
1364 | FnSize += TII.GetInstSizeInBytes(&MI); | |||
1365 | } | |||
1366 | return FnSize; | |||
1367 | } | |||
1368 | ||||
1369 | /// estimateRSStackSizeLimit - Look at each instruction that references stack | |||
1370 | /// frames and return the stack size limit beyond which some of these | |||
1371 | /// instructions will require a scratch register during their expansion later. | |||
1372 | // FIXME: Move to TII? | |||
1373 | static unsigned estimateRSStackSizeLimit(MachineFunction &MF, | |||
1374 | const TargetFrameLowering *TFI) { | |||
1375 | const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1376 | unsigned Limit = (1 << 12) - 1; | |||
1377 | for (auto &MBB : MF) { | |||
1378 | for (auto &MI : MBB) { | |||
1379 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { | |||
1380 | if (!MI.getOperand(i).isFI()) | |||
1381 | continue; | |||
1382 | ||||
1383 | // When using ADDri to get the address of a stack object, 255 is the | |||
1384 | // largest offset guaranteed to fit in the immediate offset. | |||
1385 | if (MI.getOpcode() == ARM::ADDri) { | |||
1386 | Limit = std::min(Limit, (1U << 8) - 1); | |||
1387 | break; | |||
1388 | } | |||
1389 | ||||
1390 | // Otherwise check the addressing mode. | |||
1391 | switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) { | |||
1392 | case ARMII::AddrMode3: | |||
1393 | case ARMII::AddrModeT2_i8: | |||
1394 | Limit = std::min(Limit, (1U << 8) - 1); | |||
1395 | break; | |||
1396 | case ARMII::AddrMode5: | |||
1397 | case ARMII::AddrModeT2_i8s4: | |||
1398 | Limit = std::min(Limit, ((1U << 8) - 1) * 4); | |||
1399 | break; | |||
1400 | case ARMII::AddrModeT2_i12: | |||
1401 | // i12 supports only positive offset so these will be converted to | |||
1402 | // i8 opcodes. See llvm::rewriteT2FrameIndex. | |||
1403 | if (TFI->hasFP(MF) && AFI->hasStackFrame()) | |||
1404 | Limit = std::min(Limit, (1U << 8) - 1); | |||
1405 | break; | |||
1406 | case ARMII::AddrMode4: | |||
1407 | case ARMII::AddrMode6: | |||
1408 | // Addressing modes 4 & 6 (load/store) instructions can't encode an | |||
1409 | // immediate offset for stack references. | |||
1410 | return 0; | |||
1411 | default: | |||
1412 | break; | |||
1413 | } | |||
1414 | break; // At most one FI per instruction | |||
1415 | } | |||
1416 | } | |||
1417 | } | |||
1418 | ||||
1419 | return Limit; | |||
1420 | } | |||
1421 | ||||
1422 | // In functions that realign the stack, it can be an advantage to spill the | |||
1423 | // callee-saved vector registers after realigning the stack. The vst1 and vld1 | |||
1424 | // instructions take alignment hints that can improve performance. | |||
1425 | // | |||
1426 | static void | |||
1427 | checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { | |||
1428 | MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0); | |||
1429 | if (!SpillAlignedNEONRegs) | |||
1430 | return; | |||
1431 | ||||
1432 | // Naked functions don't spill callee-saved registers. | |||
1433 | if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) | |||
1434 | return; | |||
1435 | ||||
1436 | // We are planning to use NEON instructions vst1 / vld1. | |||
1437 | if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON()) | |||
1438 | return; | |||
1439 | ||||
1440 | // Don't bother if the default stack alignment is sufficiently high. | |||
1441 | if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8) | |||
1442 | return; | |||
1443 | ||||
1444 | // Aligned spills require stack realignment. | |||
1445 | if (!static_cast<const ARMBaseRegisterInfo *>( | |||
1446 | MF.getSubtarget().getRegisterInfo())->canRealignStack(MF)) | |||
1447 | return; | |||
1448 | ||||
1449 | // We always spill contiguous d-registers starting from d8. Count how many | |||
1450 | // needs spilling. The register allocator will almost always use the | |||
1451 | // callee-saved registers in order, but it can happen that there are holes in | |||
1452 | // the range. Registers above the hole will be spilled to the standard DPRCS | |||
1453 | // area. | |||
1454 | unsigned NumSpills = 0; | |||
1455 | for (; NumSpills < 8; ++NumSpills) | |||
1456 | if (!SavedRegs.test(ARM::D8 + NumSpills)) | |||
1457 | break; | |||
1458 | ||||
1459 | // Don't do this for just one d-register. It's not worth it. | |||
1460 | if (NumSpills < 2) | |||
1461 | return; | |||
1462 | ||||
1463 | // Spill the first NumSpills D-registers after realigning the stack. | |||
1464 | MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills); | |||
1465 | ||||
1466 | // A scratch register is required for the vst1 / vld1 instructions. | |||
1467 | SavedRegs.set(ARM::R4); | |||
1468 | } | |||
1469 | ||||
1470 | void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, | |||
1471 | BitVector &SavedRegs, | |||
1472 | RegScavenger *RS) const { | |||
1473 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); | |||
1474 | // This tells PEI to spill the FP as if it is any other callee-save register | |||
1475 | // to take advantage the eliminateFrameIndex machinery. This also ensures it | |||
1476 | // is spilled in the order specified by getCalleeSavedRegs() to make it easier | |||
1477 | // to combine multiple loads / stores. | |||
1478 | bool CanEliminateFrame = true; | |||
1479 | bool CS1Spilled = false; | |||
1480 | bool LRSpilled = false; | |||
1481 | unsigned NumGPRSpills = 0; | |||
1482 | SmallVector<unsigned, 4> UnspilledCS1GPRs; | |||
1483 | SmallVector<unsigned, 4> UnspilledCS2GPRs; | |||
1484 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( | |||
1485 | MF.getSubtarget().getRegisterInfo()); | |||
1486 | const ARMBaseInstrInfo &TII = | |||
1487 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); | |||
1488 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1489 | MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
1490 | MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
1491 | unsigned FramePtr = RegInfo->getFrameRegister(MF); | |||
1492 | ||||
1493 | // Spill R4 if Thumb2 function requires stack realignment - it will be used as | |||
1494 | // scratch register. Also spill R4 if Thumb2 function has varsized objects, | |||
1495 | // since it's not always possible to restore sp from fp in a single | |||
1496 | // instruction. | |||
1497 | // FIXME: It will be better just to find spare register here. | |||
1498 | if (AFI->isThumb2Function() && | |||
1499 | (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) | |||
1500 | SavedRegs.set(ARM::R4); | |||
1501 | ||||
1502 | if (AFI->isThumb1OnlyFunction()) { | |||
| ||||
1503 | // Spill LR if Thumb1 function uses variable length argument lists. | |||
1504 | if (AFI->getArgRegsSaveSize() > 0) | |||
1505 | SavedRegs.set(ARM::LR); | |||
1506 | ||||
1507 | // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know | |||
1508 | // for sure what the stack size will be, but for this, an estimate is good | |||
1509 | // enough. If there anything changes it, it'll be a spill, which implies | |||
1510 | // we've used all the registers and so R4 is already used, so not marking | |||
1511 | // it here will be OK. | |||
1512 | // FIXME: It will be better just to find spare register here. | |||
1513 | unsigned StackSize = MFI->estimateStackSize(MF); | |||
1514 | if (MFI->hasVarSizedObjects() || StackSize > 508) | |||
1515 | SavedRegs.set(ARM::R4); | |||
1516 | } | |||
1517 | ||||
1518 | // See if we can spill vector registers to aligned stack. | |||
1519 | checkNumAlignedDPRCS2Regs(MF, SavedRegs); | |||
1520 | ||||
1521 | // Spill the BasePtr if it's used. | |||
1522 | if (RegInfo->hasBasePointer(MF)) | |||
1523 | SavedRegs.set(RegInfo->getBaseRegister()); | |||
1524 | ||||
1525 | // Don't spill FP if the frame can be eliminated. This is determined | |||
1526 | // by scanning the callee-save registers to see if any is modified. | |||
1527 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); | |||
1528 | for (unsigned i = 0; CSRegs[i]; ++i) { | |||
1529 | unsigned Reg = CSRegs[i]; | |||
1530 | bool Spilled = false; | |||
1531 | if (SavedRegs.test(Reg)) { | |||
1532 | Spilled = true; | |||
1533 | CanEliminateFrame = false; | |||
1534 | } | |||
1535 | ||||
1536 | if (!ARM::GPRRegClass.contains(Reg)) | |||
1537 | continue; | |||
1538 | ||||
1539 | if (Spilled) { | |||
1540 | NumGPRSpills++; | |||
1541 | ||||
1542 | if (!STI.isTargetMachO()) { | |||
1543 | if (Reg == ARM::LR) | |||
1544 | LRSpilled = true; | |||
1545 | CS1Spilled = true; | |||
1546 | continue; | |||
1547 | } | |||
1548 | ||||
1549 | // Keep track if LR and any of R4, R5, R6, and R7 is spilled. | |||
1550 | switch (Reg) { | |||
1551 | case ARM::LR: | |||
1552 | LRSpilled = true; | |||
1553 | // Fallthrough | |||
1554 | case ARM::R0: case ARM::R1: | |||
1555 | case ARM::R2: case ARM::R3: | |||
1556 | case ARM::R4: case ARM::R5: | |||
1557 | case ARM::R6: case ARM::R7: | |||
1558 | CS1Spilled = true; | |||
1559 | break; | |||
1560 | default: | |||
1561 | break; | |||
1562 | } | |||
1563 | } else { | |||
1564 | if (!STI.isTargetMachO()) { | |||
1565 | UnspilledCS1GPRs.push_back(Reg); | |||
1566 | continue; | |||
1567 | } | |||
1568 | ||||
1569 | switch (Reg) { | |||
1570 | case ARM::R0: case ARM::R1: | |||
1571 | case ARM::R2: case ARM::R3: | |||
1572 | case ARM::R4: case ARM::R5: | |||
1573 | case ARM::R6: case ARM::R7: | |||
1574 | case ARM::LR: | |||
1575 | UnspilledCS1GPRs.push_back(Reg); | |||
1576 | break; | |||
1577 | default: | |||
1578 | UnspilledCS2GPRs.push_back(Reg); | |||
1579 | break; | |||
1580 | } | |||
1581 | } | |||
1582 | } | |||
1583 | ||||
1584 | bool ForceLRSpill = false; | |||
1585 | if (!LRSpilled && AFI->isThumb1OnlyFunction()) { | |||
1586 | unsigned FnSize = GetFunctionSizeInBytes(MF, TII); | |||
1587 | // Force LR to be spilled if the Thumb function size is > 2048. This enables | |||
1588 | // use of BL to implement far jump. If it turns out that it's not needed | |||
1589 | // then the branch fix up path will undo it. | |||
1590 | if (FnSize >= (1 << 11)) { | |||
1591 | CanEliminateFrame = false; | |||
1592 | ForceLRSpill = true; | |||
1593 | } | |||
1594 | } | |||
1595 | ||||
1596 | // If any of the stack slot references may be out of range of an immediate | |||
1597 | // offset, make sure a register (or a spill slot) is available for the | |||
1598 | // register scavenger. Note that if we're indexing off the frame pointer, the | |||
1599 | // effective stack size is 4 bytes larger since the FP points to the stack | |||
1600 | // slot of the previous FP. Also, if we have variable sized objects in the | |||
1601 | // function, stack slot references will often be negative, and some of | |||
1602 | // our instructions are positive-offset only, so conservatively consider | |||
1603 | // that case to want a spill slot (or register) as well. Similarly, if | |||
1604 | // the function adjusts the stack pointer during execution and the | |||
1605 | // adjustments aren't already part of our stack size estimate, our offset | |||
1606 | // calculations may be off, so be conservative. | |||
1607 | // FIXME: We could add logic to be more precise about negative offsets | |||
1608 | // and which instructions will need a scratch register for them. Is it | |||
1609 | // worth the effort and added fragility? | |||
1610 | bool BigStack = (RS && (MFI->estimateStackSize(MF) + | |||
1611 | ((hasFP(MF) && AFI->hasStackFrame()) ? 4 : 0) >= | |||
1612 | estimateRSStackSizeLimit(MF, this))) || | |||
1613 | MFI->hasVarSizedObjects() || | |||
1614 | (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); | |||
1615 | ||||
1616 | bool ExtraCSSpill = false; | |||
1617 | if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { | |||
1618 | AFI->setHasStackFrame(true); | |||
1619 | ||||
1620 | // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. | |||
1621 | // Spill LR as well so we can fold BX_RET to the registers restore (LDM). | |||
1622 | if (!LRSpilled && CS1Spilled) { | |||
1623 | SavedRegs.set(ARM::LR); | |||
1624 | NumGPRSpills++; | |||
1625 | SmallVectorImpl<unsigned>::iterator LRPos; | |||
1626 | LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), | |||
1627 | (unsigned)ARM::LR); | |||
1628 | if (LRPos != UnspilledCS1GPRs.end()) | |||
1629 | UnspilledCS1GPRs.erase(LRPos); | |||
1630 | ||||
1631 | ForceLRSpill = false; | |||
1632 | ExtraCSSpill = true; | |||
1633 | } | |||
1634 | ||||
1635 | if (hasFP(MF)) { | |||
1636 | SavedRegs.set(FramePtr); | |||
1637 | auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), | |||
1638 | FramePtr); | |||
1639 | if (FPPos != UnspilledCS1GPRs.end()) | |||
1640 | UnspilledCS1GPRs.erase(FPPos); | |||
1641 | NumGPRSpills++; | |||
1642 | } | |||
1643 | ||||
1644 | // If stack and double are 8-byte aligned and we are spilling an odd number | |||
1645 | // of GPRs, spill one extra callee save GPR so we won't have to pad between | |||
1646 | // the integer and double callee save areas. | |||
1647 | unsigned TargetAlign = getStackAlignment(); | |||
1648 | if (TargetAlign >= 8 && (NumGPRSpills & 1)) { | |||
1649 | if (CS1Spilled && !UnspilledCS1GPRs.empty()) { | |||
1650 | for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { | |||
1651 | unsigned Reg = UnspilledCS1GPRs[i]; | |||
1652 | // Don't spill high register if the function is thumb. In the case of | |||
1653 | // Windows on ARM, accept R11 (frame pointer) | |||
1654 | if (!AFI->isThumbFunction() || | |||
1655 | (STI.isTargetWindows() && Reg == ARM::R11) || | |||
1656 | isARMLowRegister(Reg) || Reg == ARM::LR) { | |||
1657 | SavedRegs.set(Reg); | |||
1658 | if (!MRI.isReserved(Reg)) | |||
1659 | ExtraCSSpill = true; | |||
1660 | break; | |||
1661 | } | |||
1662 | } | |||
1663 | } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { | |||
1664 | unsigned Reg = UnspilledCS2GPRs.front(); | |||
1665 | SavedRegs.set(Reg); | |||
1666 | if (!MRI.isReserved(Reg)) | |||
1667 | ExtraCSSpill = true; | |||
1668 | } | |||
1669 | } | |||
1670 | ||||
1671 | // Estimate if we might need to scavenge a register at some point in order | |||
1672 | // to materialize a stack offset. If so, either spill one additional | |||
1673 | // callee-saved register or reserve a special spill slot to facilitate | |||
1674 | // register scavenging. Thumb1 needs a spill slot for stack pointer | |||
1675 | // adjustments also, even when the frame itself is small. | |||
1676 | if (BigStack && !ExtraCSSpill) { | |||
1677 | // If any non-reserved CS register isn't spilled, just spill one or two | |||
1678 | // extra. That should take care of it! | |||
1679 | unsigned NumExtras = TargetAlign / 4; | |||
1680 | SmallVector<unsigned, 2> Extras; | |||
1681 | while (NumExtras && !UnspilledCS1GPRs.empty()) { | |||
1682 | unsigned Reg = UnspilledCS1GPRs.back(); | |||
1683 | UnspilledCS1GPRs.pop_back(); | |||
1684 | if (!MRI.isReserved(Reg) && | |||
1685 | (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || | |||
1686 | Reg == ARM::LR)) { | |||
1687 | Extras.push_back(Reg); | |||
1688 | NumExtras--; | |||
1689 | } | |||
1690 | } | |||
1691 | // For non-Thumb1 functions, also check for hi-reg CS registers | |||
1692 | if (!AFI->isThumb1OnlyFunction()) { | |||
1693 | while (NumExtras && !UnspilledCS2GPRs.empty()) { | |||
1694 | unsigned Reg = UnspilledCS2GPRs.back(); | |||
1695 | UnspilledCS2GPRs.pop_back(); | |||
1696 | if (!MRI.isReserved(Reg)) { | |||
1697 | Extras.push_back(Reg); | |||
1698 | NumExtras--; | |||
1699 | } | |||
1700 | } | |||
1701 | } | |||
1702 | if (Extras.size() && NumExtras == 0) { | |||
1703 | for (unsigned i = 0, e = Extras.size(); i != e; ++i) { | |||
1704 | SavedRegs.set(Extras[i]); | |||
1705 | } | |||
1706 | } else if (!AFI->isThumb1OnlyFunction()) { | |||
1707 | // note: Thumb1 functions spill to R12, not the stack. Reserve a slot | |||
1708 | // closest to SP or frame pointer. | |||
1709 | const TargetRegisterClass *RC = &ARM::GPRRegClass; | |||
1710 | RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), | |||
| ||||
1711 | RC->getAlignment(), | |||
1712 | false)); | |||
1713 | } | |||
1714 | } | |||
1715 | } | |||
1716 | ||||
1717 | if (ForceLRSpill) { | |||
1718 | SavedRegs.set(ARM::LR); | |||
1719 | AFI->setLRIsSpilledForFarJump(true); | |||
1720 | } | |||
1721 | } | |||
1722 | ||||
1723 | MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( | |||
1724 | MachineFunction &MF, MachineBasicBlock &MBB, | |||
1725 | MachineBasicBlock::iterator I) const { | |||
1726 | const ARMBaseInstrInfo &TII = | |||
1727 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); | |||
1728 | if (!hasReservedCallFrame(MF)) { | |||
1729 | // If we have alloca, convert as follows: | |||
1730 | // ADJCALLSTACKDOWN -> sub, sp, sp, amount | |||
1731 | // ADJCALLSTACKUP -> add, sp, sp, amount | |||
1732 | MachineInstr *Old = I; | |||
1733 | DebugLoc dl = Old->getDebugLoc(); | |||
1734 | unsigned Amount = Old->getOperand(0).getImm(); | |||
1735 | if (Amount != 0) { | |||
1736 | // We need to keep the stack aligned properly. To do this, we round the | |||
1737 | // amount of space needed for the outgoing arguments up to the next | |||
1738 | // alignment boundary. | |||
1739 | Amount = alignSPAdjust(Amount); | |||
1740 | ||||
1741 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1742 | assert(!AFI->isThumb1OnlyFunction() &&((!AFI->isThumb1OnlyFunction() && "This eliminateCallFramePseudoInstr does not support Thumb1!" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This eliminateCallFramePseudoInstr does not support Thumb1!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 1743, __PRETTY_FUNCTION__)) | |||
1743 | "This eliminateCallFramePseudoInstr does not support Thumb1!")((!AFI->isThumb1OnlyFunction() && "This eliminateCallFramePseudoInstr does not support Thumb1!" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This eliminateCallFramePseudoInstr does not support Thumb1!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 1743, __PRETTY_FUNCTION__)); | |||
1744 | bool isARM = !AFI->isThumbFunction(); | |||
1745 | ||||
1746 | // Replace the pseudo instruction with a new instruction... | |||
1747 | unsigned Opc = Old->getOpcode(); | |||
1748 | int PIdx = Old->findFirstPredOperandIdx(); | |||
1749 | ARMCC::CondCodes Pred = (PIdx == -1) | |||
1750 | ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); | |||
1751 | if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { | |||
1752 | // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. | |||
1753 | unsigned PredReg = Old->getOperand(2).getReg(); | |||
1754 | emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, | |||
1755 | Pred, PredReg); | |||
1756 | } else { | |||
1757 | // Note: PredReg is operand 3 for ADJCALLSTACKUP. | |||
1758 | unsigned PredReg = Old->getOperand(3).getReg(); | |||
1759 | assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP)((Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP) ? static_cast<void> (0) : __assert_fail ("Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 1759, __PRETTY_FUNCTION__)); | |||
1760 | emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, | |||
1761 | Pred, PredReg); | |||
1762 | } | |||
1763 | } | |||
1764 | } | |||
1765 | return MBB.erase(I); | |||
1766 | } | |||
1767 | ||||
1768 | /// Get the minimum constant for ARM that is greater than or equal to the | |||
1769 | /// argument. In ARM, constants can have any value that can be produced by | |||
1770 | /// rotating an 8-bit value to the right by an even number of bits within a | |||
1771 | /// 32-bit word. | |||
1772 | static uint32_t alignToARMConstant(uint32_t Value) { | |||
1773 | unsigned Shifted = 0; | |||
1774 | ||||
1775 | if (Value == 0) | |||
1776 | return 0; | |||
1777 | ||||
1778 | while (!(Value & 0xC0000000)) { | |||
1779 | Value = Value << 2; | |||
1780 | Shifted += 2; | |||
1781 | } | |||
1782 | ||||
1783 | bool Carry = (Value & 0x00FFFFFF); | |||
1784 | Value = ((Value & 0xFF000000) >> 24) + Carry; | |||
1785 | ||||
1786 | if (Value & 0x0000100) | |||
1787 | Value = Value & 0x000001FC; | |||
1788 | ||||
1789 | if (Shifted > 24) | |||
1790 | Value = Value >> (Shifted - 24); | |||
1791 | else | |||
1792 | Value = Value << (24 - Shifted); | |||
1793 | ||||
1794 | return Value; | |||
1795 | } | |||
1796 | ||||
1797 | // The stack limit in the TCB is set to this many bytes above the actual | |||
1798 | // stack limit. | |||
1799 | static const uint64_t kSplitStackAvailable = 256; | |||
1800 | ||||
1801 | // Adjust the function prologue to enable split stacks. This currently only | |||
1802 | // supports android and linux. | |||
1803 | // | |||
1804 | // The ABI of the segmented stack prologue is a little arbitrarily chosen, but | |||
1805 | // must be well defined in order to allow for consistent implementations of the | |||
1806 | // __morestack helper function. The ABI is also not a normal ABI in that it | |||
1807 | // doesn't follow the normal calling conventions because this allows the | |||
1808 | // prologue of each function to be optimized further. | |||
1809 | // | |||
1810 | // Currently, the ABI looks like (when calling __morestack) | |||
1811 | // | |||
1812 | // * r4 holds the minimum stack size requested for this function call | |||
1813 | // * r5 holds the stack size of the arguments to the function | |||
1814 | // * the beginning of the function is 3 instructions after the call to | |||
1815 | // __morestack | |||
1816 | // | |||
1817 | // Implementations of __morestack should use r4 to allocate a new stack, r5 to | |||
1818 | // place the arguments on to the new stack, and the 3-instruction knowledge to | |||
1819 | // jump directly to the body of the function when working on the new stack. | |||
1820 | // | |||
1821 | // An old (and possibly no longer compatible) implementation of __morestack for | |||
1822 | // ARM can be found at [1]. | |||
1823 | // | |||
1824 | // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S | |||
1825 | void ARMFrameLowering::adjustForSegmentedStacks( | |||
1826 | MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { | |||
1827 | unsigned Opcode; | |||
1828 | unsigned CFIIndex; | |||
1829 | const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>(); | |||
1830 | bool Thumb = ST->isThumb(); | |||
1831 | ||||
1832 | // Sadly, this currently doesn't support varargs, platforms other than | |||
1833 | // android/linux. Note that thumb1/thumb2 are support for android/linux. | |||
1834 | if (MF.getFunction()->isVarArg()) | |||
1835 | report_fatal_error("Segmented stacks do not support vararg functions."); | |||
1836 | if (!ST->isTargetAndroid() && !ST->isTargetLinux()) | |||
1837 | report_fatal_error("Segmented stacks not supported on this platform."); | |||
1838 | ||||
1839 | MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
1840 | MachineModuleInfo &MMI = MF.getMMI(); | |||
1841 | MCContext &Context = MMI.getContext(); | |||
1842 | const MCRegisterInfo *MRI = Context.getRegisterInfo(); | |||
1843 | const ARMBaseInstrInfo &TII = | |||
1844 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); | |||
1845 | ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>(); | |||
1846 | DebugLoc DL; | |||
1847 | ||||
1848 | uint64_t StackSize = MFI->getStackSize(); | |||
1849 | ||||
1850 | // Do not generate a prologue for functions with a stack of size zero | |||
1851 | if (StackSize == 0) | |||
1852 | return; | |||
1853 | ||||
1854 | // Use R4 and R5 as scratch registers. | |||
1855 | // We save R4 and R5 before use and restore them before leaving the function. | |||
1856 | unsigned ScratchReg0 = ARM::R4; | |||
1857 | unsigned ScratchReg1 = ARM::R5; | |||
1858 | uint64_t AlignedStackSize; | |||
1859 | ||||
1860 | MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock(); | |||
1861 | MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock(); | |||
1862 | MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock(); | |||
1863 | MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock(); | |||
1864 | MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock(); | |||
1865 | ||||
1866 | // Grab everything that reaches PrologueMBB to update there liveness as well. | |||
1867 | SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion; | |||
1868 | SmallVector<MachineBasicBlock *, 2> WalkList; | |||
1869 | WalkList.push_back(&PrologueMBB); | |||
1870 | ||||
1871 | do { | |||
1872 | MachineBasicBlock *CurMBB = WalkList.pop_back_val(); | |||
1873 | for (MachineBasicBlock *PredBB : CurMBB->predecessors()) { | |||
1874 | if (BeforePrologueRegion.insert(PredBB).second) | |||
1875 | WalkList.push_back(PredBB); | |||
1876 | } | |||
1877 | } while (!WalkList.empty()); | |||
1878 | ||||
1879 | // The order in that list is important. | |||
1880 | // The blocks will all be inserted before PrologueMBB using that order. | |||
1881 | // Therefore the block that should appear first in the CFG should appear | |||
1882 | // first in the list. | |||
1883 | MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB, | |||
1884 | PostStackMBB}; | |||
1885 | ||||
1886 | for (MachineBasicBlock *B : AddedBlocks) | |||
1887 | BeforePrologueRegion.insert(B); | |||
1888 | ||||
1889 | for (const auto &LI : PrologueMBB.liveins()) { | |||
1890 | for (MachineBasicBlock *PredBB : BeforePrologueRegion) | |||
1891 | PredBB->addLiveIn(LI); | |||
1892 | } | |||
1893 | ||||
1894 | // Remove the newly added blocks from the list, since we know | |||
1895 | // we do not have to do the following updates for them. | |||
1896 | for (MachineBasicBlock *B : AddedBlocks) { | |||
1897 | BeforePrologueRegion.erase(B); | |||
1898 | MF.insert(PrologueMBB.getIterator(), B); | |||
1899 | } | |||
1900 | ||||
1901 | for (MachineBasicBlock *MBB : BeforePrologueRegion) { | |||
1902 | // Make sure the LiveIns are still sorted and unique. | |||
1903 | MBB->sortUniqueLiveIns(); | |||
1904 | // Replace the edges to PrologueMBB by edges to the sequences | |||
1905 | // we are about to add. | |||
1906 | MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]); | |||
1907 | } | |||
1908 | ||||
1909 | // The required stack size that is aligned to ARM constant criterion. | |||
1910 | AlignedStackSize = alignToARMConstant(StackSize); | |||
1911 | ||||
1912 | // When the frame size is less than 256 we just compare the stack | |||
1913 | // boundary directly to the value of the stack pointer, per gcc. | |||
1914 | bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable; | |||
1915 | ||||
1916 | // We will use two of the callee save registers as scratch registers so we | |||
1917 | // need to save those registers onto the stack. | |||
1918 | // We will use SR0 to hold stack limit and SR1 to hold the stack size | |||
1919 | // requested and arguments for __morestack(). | |||
1920 | // SR0: Scratch Register #0 | |||
1921 | // SR1: Scratch Register #1 | |||
1922 | // push {SR0, SR1} | |||
1923 | if (Thumb) { | |||
1924 | AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))) | |||
1925 | .addReg(ScratchReg0).addReg(ScratchReg1); | |||
1926 | } else { | |||
1927 | AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD)) | |||
1928 | .addReg(ARM::SP, RegState::Define).addReg(ARM::SP)) | |||
1929 | .addReg(ScratchReg0).addReg(ScratchReg1); | |||
1930 | } | |||
1931 | ||||
1932 | // Emit the relevant DWARF information about the change in stack pointer as | |||
1933 | // well as where to find both r4 and r5 (the callee-save registers) | |||
1934 | CFIIndex = | |||
1935 | MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8)); | |||
1936 | BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
1937 | .addCFIIndex(CFIIndex); | |||
1938 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( | |||
1939 | nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4)); | |||
1940 | BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
1941 | .addCFIIndex(CFIIndex); | |||
1942 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( | |||
1943 | nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8)); | |||
1944 | BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
1945 | .addCFIIndex(CFIIndex); | |||
1946 | ||||
1947 | // mov SR1, sp | |||
1948 | if (Thumb) { | |||
1949 | AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1) | |||
1950 | .addReg(ARM::SP)); | |||
1951 | } else if (CompareStackPointer) { | |||
1952 | AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1) | |||
1953 | .addReg(ARM::SP)).addReg(0); | |||
1954 | } | |||
1955 | ||||
1956 | // sub SR1, sp, #StackSize | |||
1957 | if (!CompareStackPointer && Thumb) { | |||
1958 | AddDefaultPred( | |||
1959 | AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)) | |||
1960 | .addReg(ScratchReg1).addImm(AlignedStackSize)); | |||
1961 | } else if (!CompareStackPointer) { | |||
1962 | AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) | |||
1963 | .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0); | |||
1964 | } | |||
1965 | ||||
1966 | if (Thumb && ST->isThumb1Only()) { | |||
1967 | unsigned PCLabelId = ARMFI->createPICLabelUId(); | |||
1968 | ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create( | |||
1969 | MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0); | |||
1970 | MachineConstantPool *MCP = MF.getConstantPool(); | |||
1971 | unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4); | |||
1972 | ||||
1973 | // ldr SR0, [pc, offset(STACK_LIMIT)] | |||
1974 | AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) | |||
1975 | .addConstantPoolIndex(CPI)); | |||
1976 | ||||
1977 | // ldr SR0, [SR0] | |||
1978 | AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0) | |||
1979 | .addReg(ScratchReg0).addImm(0)); | |||
1980 | } else { | |||
1981 | // Get TLS base address from the coprocessor | |||
1982 | // mrc p15, #0, SR0, c13, c0, #3 | |||
1983 | AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0) | |||
1984 | .addImm(15) | |||
1985 | .addImm(0) | |||
1986 | .addImm(13) | |||
1987 | .addImm(0) | |||
1988 | .addImm(3)); | |||
1989 | ||||
1990 | // Use the last tls slot on android and a private field of the TCP on linux. | |||
1991 | assert(ST->isTargetAndroid() || ST->isTargetLinux())((ST->isTargetAndroid() || ST->isTargetLinux()) ? static_cast <void> (0) : __assert_fail ("ST->isTargetAndroid() || ST->isTargetLinux()" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp" , 1991, __PRETTY_FUNCTION__)); | |||
1992 | unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1; | |||
1993 | ||||
1994 | // Get the stack limit from the right offset | |||
1995 | // ldr SR0, [sr0, #4 * TlsOffset] | |||
1996 | AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0) | |||
1997 | .addReg(ScratchReg0).addImm(4 * TlsOffset)); | |||
1998 | } | |||
1999 | ||||
2000 | // Compare stack limit with stack size requested. | |||
2001 | // cmp SR0, SR1 | |||
2002 | Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr; | |||
2003 | AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode)) | |||
2004 | .addReg(ScratchReg0) | |||
2005 | .addReg(ScratchReg1)); | |||
2006 | ||||
2007 | // This jump is taken if StackLimit < SP - stack required. | |||
2008 | Opcode = Thumb ? ARM::tBcc : ARM::Bcc; | |||
2009 | BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB) | |||
2010 | .addImm(ARMCC::LO) | |||
2011 | .addReg(ARM::CPSR); | |||
2012 | ||||
2013 | ||||
2014 | // Calling __morestack(StackSize, Size of stack arguments). | |||
2015 | // __morestack knows that the stack size requested is in SR0(r4) | |||
2016 | // and amount size of stack arguments is in SR1(r5). | |||
2017 | ||||
2018 | // Pass first argument for the __morestack by Scratch Register #0. | |||
2019 | // The amount size of stack required | |||
2020 | if (Thumb) { | |||
2021 | AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), | |||
2022 | ScratchReg0)).addImm(AlignedStackSize)); | |||
2023 | } else { | |||
2024 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) | |||
2025 | .addImm(AlignedStackSize)).addReg(0); | |||
2026 | } | |||
2027 | // Pass second argument for the __morestack by Scratch Register #1. | |||
2028 | // The amount size of stack consumed to save function arguments. | |||
2029 | if (Thumb) { | |||
2030 | AddDefaultPred( | |||
2031 | AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)) | |||
2032 | .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))); | |||
2033 | } else { | |||
2034 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) | |||
2035 | .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))) | |||
2036 | .addReg(0); | |||
2037 | } | |||
2038 | ||||
2039 | // push {lr} - Save return address of this function. | |||
2040 | if (Thumb) { | |||
2041 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))) | |||
2042 | .addReg(ARM::LR); | |||
2043 | } else { | |||
2044 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD)) | |||
2045 | .addReg(ARM::SP, RegState::Define) | |||
2046 | .addReg(ARM::SP)) | |||
2047 | .addReg(ARM::LR); | |||
2048 | } | |||
2049 | ||||
2050 | // Emit the DWARF info about the change in stack as well as where to find the | |||
2051 | // previous link register | |||
2052 | CFIIndex = | |||
2053 | MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12)); | |||
2054 | BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
2055 | .addCFIIndex(CFIIndex); | |||
2056 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( | |||
2057 | nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12)); | |||
2058 | BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
2059 | .addCFIIndex(CFIIndex); | |||
2060 | ||||
2061 | // Call __morestack(). | |||
2062 | if (Thumb) { | |||
2063 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL))) | |||
2064 | .addExternalSymbol("__morestack"); | |||
2065 | } else { | |||
2066 | BuildMI(AllocMBB, DL, TII.get(ARM::BL)) | |||
2067 | .addExternalSymbol("__morestack"); | |||
2068 | } | |||
2069 | ||||
2070 | // pop {lr} - Restore return address of this original function. | |||
2071 | if (Thumb) { | |||
2072 | if (ST->isThumb1Only()) { | |||
2073 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) | |||
2074 | .addReg(ScratchReg0); | |||
2075 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR) | |||
2076 | .addReg(ScratchReg0)); | |||
2077 | } else { | |||
2078 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST)) | |||
2079 | .addReg(ARM::LR, RegState::Define) | |||
2080 | .addReg(ARM::SP, RegState::Define) | |||
2081 | .addReg(ARM::SP) | |||
2082 | .addImm(4)); | |||
2083 | } | |||
2084 | } else { | |||
2085 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) | |||
2086 | .addReg(ARM::SP, RegState::Define) | |||
2087 | .addReg(ARM::SP)) | |||
2088 | .addReg(ARM::LR); | |||
2089 | } | |||
2090 | ||||
2091 | // Restore SR0 and SR1 in case of __morestack() was called. | |||
2092 | // __morestack() will skip PostStackMBB block so we need to restore | |||
2093 | // scratch registers from here. | |||
2094 | // pop {SR0, SR1} | |||
2095 | if (Thumb) { | |||
2096 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) | |||
2097 | .addReg(ScratchReg0) | |||
2098 | .addReg(ScratchReg1); | |||
2099 | } else { | |||
2100 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) | |||
2101 | .addReg(ARM::SP, RegState::Define) | |||
2102 | .addReg(ARM::SP)) | |||
2103 | .addReg(ScratchReg0) | |||
2104 | .addReg(ScratchReg1); | |||
2105 | } | |||
2106 | ||||
2107 | // Update the CFA offset now that we've popped | |||
2108 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); | |||
2109 | BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
2110 | .addCFIIndex(CFIIndex); | |||
2111 | ||||
2112 | // bx lr - Return from this function. | |||
2113 | Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET; | |||
2114 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode))); | |||
2115 | ||||
2116 | // Restore SR0 and SR1 in case of __morestack() was not called. | |||
2117 | // pop {SR0, SR1} | |||
2118 | if (Thumb) { | |||
2119 | AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))) | |||
2120 | .addReg(ScratchReg0) | |||
2121 | .addReg(ScratchReg1); | |||
2122 | } else { | |||
2123 | AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD)) | |||
2124 | .addReg(ARM::SP, RegState::Define) | |||
2125 | .addReg(ARM::SP)) | |||
2126 | .addReg(ScratchReg0) | |||
2127 | .addReg(ScratchReg1); | |||
2128 | } | |||
2129 | ||||
2130 | // Update the CFA offset now that we've popped | |||
2131 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); | |||
2132 | BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
2133 | .addCFIIndex(CFIIndex); | |||
2134 | ||||
2135 | // Tell debuggers that r4 and r5 are now the same as they were in the | |||
2136 | // previous function, that they're the "Same Value". | |||
2137 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( | |||
2138 | nullptr, MRI->getDwarfRegNum(ScratchReg0, true))); | |||
2139 | BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
2140 | .addCFIIndex(CFIIndex); | |||
2141 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( | |||
2142 | nullptr, MRI->getDwarfRegNum(ScratchReg1, true))); | |||
2143 | BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
2144 | .addCFIIndex(CFIIndex); | |||
2145 | ||||
2146 | // Organizing MBB lists | |||
2147 | PostStackMBB->addSuccessor(&PrologueMBB); | |||
2148 | ||||
2149 | AllocMBB->addSuccessor(PostStackMBB); | |||
2150 | ||||
2151 | GetMBB->addSuccessor(PostStackMBB); | |||
2152 | GetMBB->addSuccessor(AllocMBB); | |||
2153 | ||||
2154 | McrMBB->addSuccessor(GetMBB); | |||
2155 | ||||
2156 | PrevStackMBB->addSuccessor(McrMBB); | |||
2157 | ||||
2158 | #ifdef XDEBUG | |||
2159 | MF.verify(); | |||
2160 | #endif | |||
2161 | } |