File: | lib/Target/ARM/ARMFrameLowering.cpp |
Location: | line 1756, column 9 |
Description: | Called C++ object pointer is null |
1 | //===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | // | |||
10 | // This file contains the ARM implementation of TargetFrameLowering class. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "ARMFrameLowering.h" | |||
15 | #include "ARMBaseInstrInfo.h" | |||
16 | #include "ARMBaseRegisterInfo.h" | |||
17 | #include "ARMConstantPoolValue.h" | |||
18 | #include "ARMMachineFunctionInfo.h" | |||
19 | #include "MCTargetDesc/ARMAddressingModes.h" | |||
20 | #include "llvm/CodeGen/MachineFrameInfo.h" | |||
21 | #include "llvm/CodeGen/MachineFunction.h" | |||
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
23 | #include "llvm/CodeGen/MachineModuleInfo.h" | |||
24 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
25 | #include "llvm/CodeGen/RegisterScavenging.h" | |||
26 | #include "llvm/IR/CallingConv.h" | |||
27 | #include "llvm/IR/Function.h" | |||
28 | #include "llvm/MC/MCContext.h" | |||
29 | #include "llvm/Support/CommandLine.h" | |||
30 | #include "llvm/Target/TargetOptions.h" | |||
31 | ||||
32 | using namespace llvm; | |||
33 | ||||
34 | static cl::opt<bool> | |||
35 | SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), | |||
36 | cl::desc("Align ARM NEON spills in prolog and epilog")); | |||
37 | ||||
38 | static MachineBasicBlock::iterator | |||
39 | skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, | |||
40 | unsigned NumAlignedDPRCS2Regs); | |||
41 | ||||
42 | ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) | |||
43 | : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4), | |||
44 | STI(sti) {} | |||
45 | ||||
46 | bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const { | |||
47 | // iOS always has a FP for backtracking, force other targets to keep their FP | |||
48 | // when doing FastISel. The emitted code is currently superior, and in cases | |||
49 | // like test-suite's lencod FastISel isn't quite correct when FP is eliminated. | |||
50 | return TargetFrameLowering::noFramePointerElim(MF) || | |||
51 | MF.getSubtarget<ARMSubtarget>().useFastISel(); | |||
52 | } | |||
53 | ||||
54 | /// hasFP - Return true if the specified function should have a dedicated frame | |||
55 | /// pointer register. This is true if the function has variable sized allocas | |||
56 | /// or if frame pointer elimination is disabled. | |||
57 | bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { | |||
58 | const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); | |||
59 | ||||
60 | // iOS requires FP not to be clobbered for backtracing purpose. | |||
61 | if (STI.isTargetIOS()) | |||
62 | return true; | |||
63 | ||||
64 | const MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
65 | // Always eliminate non-leaf frame pointers. | |||
66 | return ((MF.getTarget().Options.DisableFramePointerElim(MF) && | |||
67 | MFI->hasCalls()) || | |||
68 | RegInfo->needsStackRealignment(MF) || | |||
69 | MFI->hasVarSizedObjects() || | |||
70 | MFI->isFrameAddressTaken()); | |||
71 | } | |||
72 | ||||
73 | /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is | |||
74 | /// not required, we reserve argument space for call sites in the function | |||
75 | /// immediately on entry to the current function. This eliminates the need for | |||
76 | /// add/sub sp brackets around call sites. Returns true if the call frame is | |||
77 | /// included as part of the stack frame. | |||
78 | bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { | |||
79 | const MachineFrameInfo *FFI = MF.getFrameInfo(); | |||
80 | unsigned CFSize = FFI->getMaxCallFrameSize(); | |||
81 | // It's not always a good idea to include the call frame as part of the | |||
82 | // stack frame. ARM (especially Thumb) has small immediate offset to | |||
83 | // address the stack frame. So a large call frame can cause poor codegen | |||
84 | // and may even makes it impossible to scavenge a register. | |||
85 | if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 | |||
86 | return false; | |||
87 | ||||
88 | return !MF.getFrameInfo()->hasVarSizedObjects(); | |||
89 | } | |||
90 | ||||
91 | /// canSimplifyCallFramePseudos - If there is a reserved call frame, the | |||
92 | /// call frame pseudos can be simplified. Unlike most targets, having a FP | |||
93 | /// is not sufficient here since we still may reference some objects via SP | |||
94 | /// even when FP is available in Thumb2 mode. | |||
95 | bool | |||
96 | ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { | |||
97 | return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); | |||
98 | } | |||
99 | ||||
100 | static bool isCSRestore(MachineInstr *MI, | |||
101 | const ARMBaseInstrInfo &TII, | |||
102 | const MCPhysReg *CSRegs) { | |||
103 | // Integer spill area is handled with "pop". | |||
104 | if (isPopOpcode(MI->getOpcode())) { | |||
105 | // The first two operands are predicates. The last two are | |||
106 | // imp-def and imp-use of SP. Check everything in between. | |||
107 | for (int i = 5, e = MI->getNumOperands(); i != e; ++i) | |||
108 | if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) | |||
109 | return false; | |||
110 | return true; | |||
111 | } | |||
112 | if ((MI->getOpcode() == ARM::LDR_POST_IMM || | |||
113 | MI->getOpcode() == ARM::LDR_POST_REG || | |||
114 | MI->getOpcode() == ARM::t2LDR_POST) && | |||
115 | isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) && | |||
116 | MI->getOperand(1).getReg() == ARM::SP) | |||
117 | return true; | |||
118 | ||||
119 | return false; | |||
120 | } | |||
121 | ||||
122 | static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, | |||
123 | MachineBasicBlock::iterator &MBBI, DebugLoc dl, | |||
124 | const ARMBaseInstrInfo &TII, unsigned DestReg, | |||
125 | unsigned SrcReg, int NumBytes, | |||
126 | unsigned MIFlags = MachineInstr::NoFlags, | |||
127 | ARMCC::CondCodes Pred = ARMCC::AL, | |||
128 | unsigned PredReg = 0) { | |||
129 | if (isARM) | |||
130 | emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, | |||
131 | Pred, PredReg, TII, MIFlags); | |||
132 | else | |||
133 | emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, | |||
134 | Pred, PredReg, TII, MIFlags); | |||
135 | } | |||
136 | ||||
137 | static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, | |||
138 | MachineBasicBlock::iterator &MBBI, DebugLoc dl, | |||
139 | const ARMBaseInstrInfo &TII, int NumBytes, | |||
140 | unsigned MIFlags = MachineInstr::NoFlags, | |||
141 | ARMCC::CondCodes Pred = ARMCC::AL, | |||
142 | unsigned PredReg = 0) { | |||
143 | emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes, | |||
144 | MIFlags, Pred, PredReg); | |||
145 | } | |||
146 | ||||
147 | static int sizeOfSPAdjustment(const MachineInstr *MI) { | |||
148 | int RegSize; | |||
149 | switch (MI->getOpcode()) { | |||
150 | case ARM::VSTMDDB_UPD: | |||
151 | RegSize = 8; | |||
152 | break; | |||
153 | case ARM::STMDB_UPD: | |||
154 | case ARM::t2STMDB_UPD: | |||
155 | RegSize = 4; | |||
156 | break; | |||
157 | case ARM::t2STR_PRE: | |||
158 | case ARM::STR_PRE_IMM: | |||
159 | return 4; | |||
160 | default: | |||
161 | llvm_unreachable("Unknown push or pop like instruction")::llvm::llvm_unreachable_internal("Unknown push or pop like instruction" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 161); | |||
162 | } | |||
163 | ||||
164 | int count = 0; | |||
165 | // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ | |||
166 | // pred) so the list starts at 4. | |||
167 | for (int i = MI->getNumOperands() - 1; i >= 4; --i) | |||
168 | count += RegSize; | |||
169 | return count; | |||
170 | } | |||
171 | ||||
172 | static bool WindowsRequiresStackProbe(const MachineFunction &MF, | |||
173 | size_t StackSizeInBytes) { | |||
174 | const MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
175 | const Function *F = MF.getFunction(); | |||
176 | unsigned StackProbeSize = (MFI->getStackProtectorIndex() > 0) ? 4080 : 4096; | |||
177 | if (F->hasFnAttribute("stack-probe-size")) | |||
178 | F->getFnAttribute("stack-probe-size") | |||
179 | .getValueAsString() | |||
180 | .getAsInteger(0, StackProbeSize); | |||
181 | return StackSizeInBytes >= StackProbeSize; | |||
182 | } | |||
183 | ||||
184 | namespace { | |||
185 | struct StackAdjustingInsts { | |||
186 | struct InstInfo { | |||
187 | MachineBasicBlock::iterator I; | |||
188 | unsigned SPAdjust; | |||
189 | bool BeforeFPSet; | |||
190 | }; | |||
191 | ||||
192 | SmallVector<InstInfo, 4> Insts; | |||
193 | ||||
194 | void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust, | |||
195 | bool BeforeFPSet = false) { | |||
196 | InstInfo Info = {I, SPAdjust, BeforeFPSet}; | |||
197 | Insts.push_back(Info); | |||
198 | } | |||
199 | ||||
200 | void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) { | |||
201 | auto Info = std::find_if(Insts.begin(), Insts.end(), | |||
202 | [&](InstInfo &Info) { return Info.I == I; }); | |||
203 | assert(Info != Insts.end() && "invalid sp adjusting instruction")((Info != Insts.end() && "invalid sp adjusting instruction" ) ? static_cast<void> (0) : __assert_fail ("Info != Insts.end() && \"invalid sp adjusting instruction\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 203, __PRETTY_FUNCTION__)); | |||
204 | Info->SPAdjust += ExtraBytes; | |||
205 | } | |||
206 | ||||
207 | void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB, | |||
208 | DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) { | |||
209 | unsigned CFAOffset = 0; | |||
210 | for (auto &Info : Insts) { | |||
211 | if (HasFP && !Info.BeforeFPSet) | |||
212 | return; | |||
213 | ||||
214 | CFAOffset -= Info.SPAdjust; | |||
215 | unsigned CFIIndex = MMI.addFrameInst( | |||
216 | MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); | |||
217 | BuildMI(MBB, std::next(Info.I), dl, | |||
218 | TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
219 | .addCFIIndex(CFIIndex) | |||
220 | .setMIFlags(MachineInstr::FrameSetup); | |||
221 | } | |||
222 | } | |||
223 | }; | |||
224 | } | |||
225 | ||||
226 | /// Emit an instruction sequence that will align the address in | |||
227 | /// register Reg by zero-ing out the lower bits. For versions of the | |||
228 | /// architecture that support Neon, this must be done in a single | |||
229 | /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a | |||
230 | /// single instruction. That function only gets called when optimizing | |||
231 | /// spilling of D registers on a core with the Neon instruction set | |||
232 | /// present. | |||
233 | static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, | |||
234 | const TargetInstrInfo &TII, | |||
235 | MachineBasicBlock &MBB, | |||
236 | MachineBasicBlock::iterator MBBI, | |||
237 | DebugLoc DL, const unsigned Reg, | |||
238 | const unsigned Alignment, | |||
239 | const bool MustBeSingleInstruction) { | |||
240 | const ARMSubtarget &AST = | |||
241 | static_cast<const ARMSubtarget &>(MF.getSubtarget()); | |||
242 | const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops(); | |||
243 | const unsigned AlignMask = Alignment - 1; | |||
244 | const unsigned NrBitsToZero = countTrailingZeros(Alignment); | |||
245 | assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported")((!AFI->isThumb1OnlyFunction() && "Thumb1 not supported" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"Thumb1 not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 245, __PRETTY_FUNCTION__)); | |||
246 | if (!AFI->isThumbFunction()) { | |||
247 | // if the BFC instruction is available, use that to zero the lower | |||
248 | // bits: | |||
249 | // bfc Reg, #0, log2(Alignment) | |||
250 | // otherwise use BIC, if the mask to zero the required number of bits | |||
251 | // can be encoded in the bic immediate field | |||
252 | // bic Reg, Reg, Alignment-1 | |||
253 | // otherwise, emit | |||
254 | // lsr Reg, Reg, log2(Alignment) | |||
255 | // lsl Reg, Reg, log2(Alignment) | |||
256 | if (CanUseBFC) { | |||
257 | AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg) | |||
258 | .addReg(Reg, RegState::Kill) | |||
259 | .addImm(~AlignMask)); | |||
260 | } else if (AlignMask <= 255) { | |||
261 | AddDefaultCC( | |||
262 | AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg) | |||
263 | .addReg(Reg, RegState::Kill) | |||
264 | .addImm(AlignMask))); | |||
265 | } else { | |||
266 | assert(!MustBeSingleInstruction &&((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single " "instruction to be emitted for large stack alignment for a target " "without BFC.") ? static_cast<void> (0) : __assert_fail ("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 269, __PRETTY_FUNCTION__)) | |||
267 | "Shouldn't call emitAligningInstructions demanding a single "((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single " "instruction to be emitted for large stack alignment for a target " "without BFC.") ? static_cast<void> (0) : __assert_fail ("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 269, __PRETTY_FUNCTION__)) | |||
268 | "instruction to be emitted for large stack alignment for a target "((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single " "instruction to be emitted for large stack alignment for a target " "without BFC.") ? static_cast<void> (0) : __assert_fail ("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 269, __PRETTY_FUNCTION__)) | |||
269 | "without BFC.")((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single " "instruction to be emitted for large stack alignment for a target " "without BFC.") ? static_cast<void> (0) : __assert_fail ("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 269, __PRETTY_FUNCTION__)); | |||
270 | AddDefaultCC(AddDefaultPred( | |||
271 | BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) | |||
272 | .addReg(Reg, RegState::Kill) | |||
273 | .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero)))); | |||
274 | AddDefaultCC(AddDefaultPred( | |||
275 | BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) | |||
276 | .addReg(Reg, RegState::Kill) | |||
277 | .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero)))); | |||
278 | } | |||
279 | } else { | |||
280 | // Since this is only reached for Thumb-2 targets, the BFC instruction | |||
281 | // should always be available. | |||
282 | assert(CanUseBFC)((CanUseBFC) ? static_cast<void> (0) : __assert_fail ("CanUseBFC" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 282, __PRETTY_FUNCTION__)); | |||
283 | AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg) | |||
284 | .addReg(Reg, RegState::Kill) | |||
285 | .addImm(~AlignMask)); | |||
286 | } | |||
287 | } | |||
288 | ||||
289 | void ARMFrameLowering::emitPrologue(MachineFunction &MF, | |||
290 | MachineBasicBlock &MBB) const { | |||
291 | assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented")((&MBB == &MF.front() && "Shrink-wrapping not yet implemented" ) ? static_cast<void> (0) : __assert_fail ("&MBB == &MF.front() && \"Shrink-wrapping not yet implemented\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 291, __PRETTY_FUNCTION__)); | |||
292 | MachineBasicBlock::iterator MBBI = MBB.begin(); | |||
293 | MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
294 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
295 | MachineModuleInfo &MMI = MF.getMMI(); | |||
296 | MCContext &Context = MMI.getContext(); | |||
297 | const TargetMachine &TM = MF.getTarget(); | |||
298 | const MCRegisterInfo *MRI = Context.getRegisterInfo(); | |||
299 | const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo(); | |||
300 | const ARMBaseInstrInfo &TII = *STI.getInstrInfo(); | |||
301 | assert(!AFI->isThumb1OnlyFunction() &&((!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitPrologue does not support Thumb1!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 302, __PRETTY_FUNCTION__)) | |||
302 | "This emitPrologue does not support Thumb1!")((!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitPrologue does not support Thumb1!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 302, __PRETTY_FUNCTION__)); | |||
303 | bool isARM = !AFI->isThumbFunction(); | |||
304 | unsigned Align = STI.getFrameLowering()->getStackAlignment(); | |||
305 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); | |||
306 | unsigned NumBytes = MFI->getStackSize(); | |||
307 | const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); | |||
308 | DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); | |||
309 | unsigned FramePtr = RegInfo->getFrameRegister(MF); | |||
310 | ||||
311 | // Determine the sizes of each callee-save spill areas and record which frame | |||
312 | // belongs to which callee-save spill areas. | |||
313 | unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; | |||
314 | int FramePtrSpillFI = 0; | |||
315 | int D8SpillFI = 0; | |||
316 | ||||
317 | // All calls are tail calls in GHC calling conv, and functions have no | |||
318 | // prologue/epilogue. | |||
319 | if (MF.getFunction()->getCallingConv() == CallingConv::GHC) | |||
320 | return; | |||
321 | ||||
322 | StackAdjustingInsts DefCFAOffsetCandidates; | |||
323 | bool HasFP = hasFP(MF); | |||
324 | ||||
325 | // Allocate the vararg register save area. | |||
326 | if (ArgRegsSaveSize) { | |||
327 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, | |||
328 | MachineInstr::FrameSetup); | |||
329 | DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true); | |||
330 | } | |||
331 | ||||
332 | if (!AFI->hasStackFrame() && | |||
333 | (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) { | |||
334 | if (NumBytes - ArgRegsSaveSize != 0) { | |||
335 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), | |||
336 | MachineInstr::FrameSetup); | |||
337 | DefCFAOffsetCandidates.addInst(std::prev(MBBI), | |||
338 | NumBytes - ArgRegsSaveSize, true); | |||
339 | } | |||
340 | DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); | |||
341 | return; | |||
342 | } | |||
343 | ||||
344 | // Determine spill area sizes. | |||
345 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { | |||
346 | unsigned Reg = CSI[i].getReg(); | |||
347 | int FI = CSI[i].getFrameIdx(); | |||
348 | switch (Reg) { | |||
349 | case ARM::R8: | |||
350 | case ARM::R9: | |||
351 | case ARM::R10: | |||
352 | case ARM::R11: | |||
353 | case ARM::R12: | |||
354 | if (STI.isTargetDarwin()) { | |||
355 | GPRCS2Size += 4; | |||
356 | break; | |||
357 | } | |||
358 | // fallthrough | |||
359 | case ARM::R0: | |||
360 | case ARM::R1: | |||
361 | case ARM::R2: | |||
362 | case ARM::R3: | |||
363 | case ARM::R4: | |||
364 | case ARM::R5: | |||
365 | case ARM::R6: | |||
366 | case ARM::R7: | |||
367 | case ARM::LR: | |||
368 | if (Reg == FramePtr) | |||
369 | FramePtrSpillFI = FI; | |||
370 | GPRCS1Size += 4; | |||
371 | break; | |||
372 | default: | |||
373 | // This is a DPR. Exclude the aligned DPRCS2 spills. | |||
374 | if (Reg == ARM::D8) | |||
375 | D8SpillFI = FI; | |||
376 | if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) | |||
377 | DPRCSSize += 8; | |||
378 | } | |||
379 | } | |||
380 | ||||
381 | // Move past area 1. | |||
382 | MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push; | |||
383 | if (GPRCS1Size > 0) { | |||
384 | GPRCS1Push = LastPush = MBBI++; | |||
385 | DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true); | |||
386 | } | |||
387 | ||||
388 | // Determine starting offsets of spill areas. | |||
389 | unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size; | |||
390 | unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; | |||
391 | unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U; | |||
392 | unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign; | |||
393 | unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; | |||
394 | int FramePtrOffsetInPush = 0; | |||
395 | if (HasFP) { | |||
396 | FramePtrOffsetInPush = | |||
397 | MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; | |||
398 | AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + | |||
399 | NumBytes); | |||
400 | } | |||
401 | AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); | |||
402 | AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); | |||
403 | AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); | |||
404 | ||||
405 | // Move past area 2. | |||
406 | if (GPRCS2Size > 0) { | |||
407 | GPRCS2Push = LastPush = MBBI++; | |||
408 | DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); | |||
409 | } | |||
410 | ||||
411 | // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our | |||
412 | // .cfi_offset operations will reflect that. | |||
413 | if (DPRGapSize) { | |||
414 | assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs")((DPRGapSize == 4 && "unexpected alignment requirements for DPRs" ) ? static_cast<void> (0) : __assert_fail ("DPRGapSize == 4 && \"unexpected alignment requirements for DPRs\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 414, __PRETTY_FUNCTION__)); | |||
415 | if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize)) | |||
416 | DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize); | |||
417 | else { | |||
418 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize, | |||
419 | MachineInstr::FrameSetup); | |||
420 | DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize); | |||
421 | } | |||
422 | } | |||
423 | ||||
424 | // Move past area 3. | |||
425 | if (DPRCSSize > 0) { | |||
426 | // Since vpush register list cannot have gaps, there may be multiple vpush | |||
427 | // instructions in the prologue. | |||
428 | while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) { | |||
429 | DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI)); | |||
430 | LastPush = MBBI++; | |||
431 | } | |||
432 | } | |||
433 | ||||
434 | // Move past the aligned DPRCS2 area. | |||
435 | if (AFI->getNumAlignedDPRCS2Regs() > 0) { | |||
436 | MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs()); | |||
437 | // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and | |||
438 | // leaves the stack pointer pointing to the DPRCS2 area. | |||
439 | // | |||
440 | // Adjust NumBytes to represent the stack slots below the DPRCS2 area. | |||
441 | NumBytes += MFI->getObjectOffset(D8SpillFI); | |||
442 | } else | |||
443 | NumBytes = DPRCSOffset; | |||
444 | ||||
445 | if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) { | |||
446 | uint32_t NumWords = NumBytes >> 2; | |||
447 | ||||
448 | if (NumWords < 65536) | |||
449 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) | |||
450 | .addImm(NumWords) | |||
451 | .setMIFlags(MachineInstr::FrameSetup)); | |||
452 | else | |||
453 | BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4) | |||
454 | .addImm(NumWords) | |||
455 | .setMIFlags(MachineInstr::FrameSetup); | |||
456 | ||||
457 | switch (TM.getCodeModel()) { | |||
458 | case CodeModel::Small: | |||
459 | case CodeModel::Medium: | |||
460 | case CodeModel::Default: | |||
461 | case CodeModel::Kernel: | |||
462 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL)) | |||
463 | .addImm((unsigned)ARMCC::AL).addReg(0) | |||
464 | .addExternalSymbol("__chkstk") | |||
465 | .addReg(ARM::R4, RegState::Implicit) | |||
466 | .setMIFlags(MachineInstr::FrameSetup); | |||
467 | break; | |||
468 | case CodeModel::Large: | |||
469 | case CodeModel::JITDefault: | |||
470 | BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12) | |||
471 | .addExternalSymbol("__chkstk") | |||
472 | .setMIFlags(MachineInstr::FrameSetup); | |||
473 | ||||
474 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr)) | |||
475 | .addImm((unsigned)ARMCC::AL).addReg(0) | |||
476 | .addReg(ARM::R12, RegState::Kill) | |||
477 | .addReg(ARM::R4, RegState::Implicit) | |||
478 | .setMIFlags(MachineInstr::FrameSetup); | |||
479 | break; | |||
480 | } | |||
481 | ||||
482 | AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), | |||
483 | ARM::SP) | |||
484 | .addReg(ARM::SP, RegState::Define) | |||
485 | .addReg(ARM::R4, RegState::Kill) | |||
486 | .setMIFlags(MachineInstr::FrameSetup))); | |||
487 | NumBytes = 0; | |||
488 | } | |||
489 | ||||
490 | if (NumBytes) { | |||
491 | // Adjust SP after all the callee-save spills. | |||
492 | if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) | |||
493 | DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes); | |||
494 | else { | |||
495 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, | |||
496 | MachineInstr::FrameSetup); | |||
497 | DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes); | |||
498 | } | |||
499 | ||||
500 | if (HasFP && isARM) | |||
501 | // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 | |||
502 | // Note it's not safe to do this in Thumb2 mode because it would have | |||
503 | // taken two instructions: | |||
504 | // mov sp, r7 | |||
505 | // sub sp, #24 | |||
506 | // If an interrupt is taken between the two instructions, then sp is in | |||
507 | // an inconsistent state (pointing to the middle of callee-saved area). | |||
508 | // The interrupt handler can end up clobbering the registers. | |||
509 | AFI->setShouldRestoreSPFromFP(true); | |||
510 | } | |||
511 | ||||
512 | // Set FP to point to the stack slot that contains the previous FP. | |||
513 | // For iOS, FP is R7, which has now been stored in spill area 1. | |||
514 | // Otherwise, if this is not iOS, all the callee-saved registers go | |||
515 | // into spill area 1, including the FP in R11. In either case, it | |||
516 | // is in area one and the adjustment needs to take place just after | |||
517 | // that push. | |||
518 | if (HasFP) { | |||
519 | MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); | |||
520 | unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push); | |||
521 | emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, | |||
522 | dl, TII, FramePtr, ARM::SP, | |||
523 | PushSize + FramePtrOffsetInPush, | |||
524 | MachineInstr::FrameSetup); | |||
525 | if (FramePtrOffsetInPush + PushSize != 0) { | |||
526 | unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( | |||
527 | nullptr, MRI->getDwarfRegNum(FramePtr, true), | |||
528 | -(ArgRegsSaveSize - FramePtrOffsetInPush))); | |||
529 | BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
530 | .addCFIIndex(CFIIndex) | |||
531 | .setMIFlags(MachineInstr::FrameSetup); | |||
532 | } else { | |||
533 | unsigned CFIIndex = | |||
534 | MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( | |||
535 | nullptr, MRI->getDwarfRegNum(FramePtr, true))); | |||
536 | BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
537 | .addCFIIndex(CFIIndex) | |||
538 | .setMIFlags(MachineInstr::FrameSetup); | |||
539 | } | |||
540 | } | |||
541 | ||||
542 | // Now that the prologue's actual instructions are finalised, we can insert | |||
543 | // the necessary DWARF cf instructions to describe the situation. Start by | |||
544 | // recording where each register ended up: | |||
545 | if (GPRCS1Size > 0) { | |||
546 | MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); | |||
547 | int CFIIndex; | |||
548 | for (const auto &Entry : CSI) { | |||
549 | unsigned Reg = Entry.getReg(); | |||
550 | int FI = Entry.getFrameIdx(); | |||
551 | switch (Reg) { | |||
552 | case ARM::R8: | |||
553 | case ARM::R9: | |||
554 | case ARM::R10: | |||
555 | case ARM::R11: | |||
556 | case ARM::R12: | |||
557 | if (STI.isTargetDarwin()) | |||
558 | break; | |||
559 | // fallthrough | |||
560 | case ARM::R0: | |||
561 | case ARM::R1: | |||
562 | case ARM::R2: | |||
563 | case ARM::R3: | |||
564 | case ARM::R4: | |||
565 | case ARM::R5: | |||
566 | case ARM::R6: | |||
567 | case ARM::R7: | |||
568 | case ARM::LR: | |||
569 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( | |||
570 | nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); | |||
571 | BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
572 | .addCFIIndex(CFIIndex) | |||
573 | .setMIFlags(MachineInstr::FrameSetup); | |||
574 | break; | |||
575 | } | |||
576 | } | |||
577 | } | |||
578 | ||||
579 | if (GPRCS2Size > 0) { | |||
580 | MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); | |||
581 | for (const auto &Entry : CSI) { | |||
582 | unsigned Reg = Entry.getReg(); | |||
583 | int FI = Entry.getFrameIdx(); | |||
584 | switch (Reg) { | |||
585 | case ARM::R8: | |||
586 | case ARM::R9: | |||
587 | case ARM::R10: | |||
588 | case ARM::R11: | |||
589 | case ARM::R12: | |||
590 | if (STI.isTargetDarwin()) { | |||
591 | unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); | |||
592 | unsigned Offset = MFI->getObjectOffset(FI); | |||
593 | unsigned CFIIndex = MMI.addFrameInst( | |||
594 | MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); | |||
595 | BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
596 | .addCFIIndex(CFIIndex) | |||
597 | .setMIFlags(MachineInstr::FrameSetup); | |||
598 | } | |||
599 | break; | |||
600 | } | |||
601 | } | |||
602 | } | |||
603 | ||||
604 | if (DPRCSSize > 0) { | |||
605 | // Since vpush register list cannot have gaps, there may be multiple vpush | |||
606 | // instructions in the prologue. | |||
607 | MachineBasicBlock::iterator Pos = std::next(LastPush); | |||
608 | for (const auto &Entry : CSI) { | |||
609 | unsigned Reg = Entry.getReg(); | |||
610 | int FI = Entry.getFrameIdx(); | |||
611 | if ((Reg >= ARM::D0 && Reg <= ARM::D31) && | |||
612 | (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) { | |||
613 | unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); | |||
614 | unsigned Offset = MFI->getObjectOffset(FI); | |||
615 | unsigned CFIIndex = MMI.addFrameInst( | |||
616 | MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); | |||
617 | BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
618 | .addCFIIndex(CFIIndex) | |||
619 | .setMIFlags(MachineInstr::FrameSetup); | |||
620 | } | |||
621 | } | |||
622 | } | |||
623 | ||||
624 | // Now we can emit descriptions of where the canonical frame address was | |||
625 | // throughout the process. If we have a frame pointer, it takes over the job | |||
626 | // half-way through, so only the first few .cfi_def_cfa_offset instructions | |||
627 | // actually get emitted. | |||
628 | DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); | |||
629 | ||||
630 | if (STI.isTargetELF() && hasFP(MF)) | |||
631 | MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - | |||
632 | AFI->getFramePtrSpillOffset()); | |||
633 | ||||
634 | AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); | |||
635 | AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); | |||
636 | AFI->setDPRCalleeSavedGapSize(DPRGapSize); | |||
637 | AFI->setDPRCalleeSavedAreaSize(DPRCSSize); | |||
638 | ||||
639 | // If we need dynamic stack realignment, do it here. Be paranoid and make | |||
640 | // sure if we also have VLAs, we have a base pointer for frame access. | |||
641 | // If aligned NEON registers were spilled, the stack has already been | |||
642 | // realigned. | |||
643 | if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { | |||
644 | unsigned MaxAlign = MFI->getMaxAlignment(); | |||
645 | assert(!AFI->isThumb1OnlyFunction())((!AFI->isThumb1OnlyFunction()) ? static_cast<void> ( 0) : __assert_fail ("!AFI->isThumb1OnlyFunction()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 645, __PRETTY_FUNCTION__)); | |||
646 | if (!AFI->isThumbFunction()) { | |||
647 | emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign, | |||
648 | false); | |||
649 | } else { | |||
650 | // We cannot use sp as source/dest register here, thus we're using r4 to | |||
651 | // perform the calculations. We're emitting the following sequence: | |||
652 | // mov r4, sp | |||
653 | // -- use emitAligningInstructions to produce best sequence to zero | |||
654 | // -- out lower bits in r4 | |||
655 | // mov sp, r4 | |||
656 | // FIXME: It will be better just to find spare register here. | |||
657 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) | |||
658 | .addReg(ARM::SP, RegState::Kill)); | |||
659 | emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign, | |||
660 | false); | |||
661 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) | |||
662 | .addReg(ARM::R4, RegState::Kill)); | |||
663 | } | |||
664 | ||||
665 | AFI->setShouldRestoreSPFromFP(true); | |||
666 | } | |||
667 | ||||
668 | // If we need a base pointer, set it up here. It's whatever the value | |||
669 | // of the stack pointer is at this point. Any variable size objects | |||
670 | // will be allocated after this, so we can still use the base pointer | |||
671 | // to reference locals. | |||
672 | // FIXME: Clarify FrameSetup flags here. | |||
673 | if (RegInfo->hasBasePointer(MF)) { | |||
674 | if (isARM) | |||
675 | BuildMI(MBB, MBBI, dl, | |||
676 | TII.get(ARM::MOVr), RegInfo->getBaseRegister()) | |||
677 | .addReg(ARM::SP) | |||
678 | .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); | |||
679 | else | |||
680 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), | |||
681 | RegInfo->getBaseRegister()) | |||
682 | .addReg(ARM::SP)); | |||
683 | } | |||
684 | ||||
685 | // If the frame has variable sized objects then the epilogue must restore | |||
686 | // the sp from fp. We can assume there's an FP here since hasFP already | |||
687 | // checks for hasVarSizedObjects. | |||
688 | if (MFI->hasVarSizedObjects()) | |||
689 | AFI->setShouldRestoreSPFromFP(true); | |||
690 | } | |||
691 | ||||
692 | // Resolve TCReturn pseudo-instruction | |||
693 | void ARMFrameLowering::fixTCReturn(MachineFunction &MF, | |||
694 | MachineBasicBlock &MBB) const { | |||
695 | MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); | |||
696 | assert(MBBI->isReturn() && "Can only insert epilog into returning blocks")((MBBI->isReturn() && "Can only insert epilog into returning blocks" ) ? static_cast<void> (0) : __assert_fail ("MBBI->isReturn() && \"Can only insert epilog into returning blocks\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 696, __PRETTY_FUNCTION__)); | |||
697 | unsigned RetOpcode = MBBI->getOpcode(); | |||
698 | DebugLoc dl = MBBI->getDebugLoc(); | |||
699 | const ARMBaseInstrInfo &TII = | |||
700 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); | |||
701 | ||||
702 | if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri)) | |||
703 | return; | |||
704 | ||||
705 | // Tail call return: adjust the stack pointer and jump to callee. | |||
706 | MBBI = MBB.getLastNonDebugInstr(); | |||
707 | MachineOperand &JumpTarget = MBBI->getOperand(0); | |||
708 | ||||
709 | // Jump to label or value in register. | |||
710 | if (RetOpcode == ARM::TCRETURNdi) { | |||
711 | unsigned TCOpcode = STI.isThumb() ? | |||
712 | (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : | |||
713 | ARM::TAILJMPd; | |||
714 | MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); | |||
715 | if (JumpTarget.isGlobal()) | |||
716 | MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), | |||
717 | JumpTarget.getTargetFlags()); | |||
718 | else { | |||
719 | assert(JumpTarget.isSymbol())((JumpTarget.isSymbol()) ? static_cast<void> (0) : __assert_fail ("JumpTarget.isSymbol()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 719, __PRETTY_FUNCTION__)); | |||
720 | MIB.addExternalSymbol(JumpTarget.getSymbolName(), | |||
721 | JumpTarget.getTargetFlags()); | |||
722 | } | |||
723 | ||||
724 | // Add the default predicate in Thumb mode. | |||
725 | if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0); | |||
726 | } else if (RetOpcode == ARM::TCRETURNri) { | |||
727 | BuildMI(MBB, MBBI, dl, | |||
728 | TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). | |||
729 | addReg(JumpTarget.getReg(), RegState::Kill); | |||
730 | } | |||
731 | ||||
732 | MachineInstr *NewMI = std::prev(MBBI); | |||
733 | for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) | |||
734 | NewMI->addOperand(MBBI->getOperand(i)); | |||
735 | ||||
736 | // Delete the pseudo instruction TCRETURN. | |||
737 | MBB.erase(MBBI); | |||
738 | MBBI = NewMI; | |||
739 | } | |||
740 | ||||
741 | void ARMFrameLowering::emitEpilogue(MachineFunction &MF, | |||
742 | MachineBasicBlock &MBB) const { | |||
743 | MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); | |||
744 | assert(MBBI->isReturn() && "Can only insert epilog into returning blocks")((MBBI->isReturn() && "Can only insert epilog into returning blocks" ) ? static_cast<void> (0) : __assert_fail ("MBBI->isReturn() && \"Can only insert epilog into returning blocks\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 744, __PRETTY_FUNCTION__)); | |||
745 | DebugLoc dl = MBBI->getDebugLoc(); | |||
746 | MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
747 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
748 | const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); | |||
749 | const ARMBaseInstrInfo &TII = | |||
750 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); | |||
751 | assert(!AFI->isThumb1OnlyFunction() &&((!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitEpilogue does not support Thumb1!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 752, __PRETTY_FUNCTION__)) | |||
752 | "This emitEpilogue does not support Thumb1!")((!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitEpilogue does not support Thumb1!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 752, __PRETTY_FUNCTION__)); | |||
753 | bool isARM = !AFI->isThumbFunction(); | |||
754 | ||||
755 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); | |||
756 | int NumBytes = (int)MFI->getStackSize(); | |||
757 | unsigned FramePtr = RegInfo->getFrameRegister(MF); | |||
758 | ||||
759 | // All calls are tail calls in GHC calling conv, and functions have no | |||
760 | // prologue/epilogue. | |||
761 | if (MF.getFunction()->getCallingConv() == CallingConv::GHC) { | |||
762 | fixTCReturn(MF, MBB); | |||
763 | return; | |||
764 | } | |||
765 | ||||
766 | if (!AFI->hasStackFrame()) { | |||
767 | if (NumBytes - ArgRegsSaveSize != 0) | |||
768 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize); | |||
769 | } else { | |||
770 | // Unwind MBBI to point to first LDR / VLDRD. | |||
771 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); | |||
772 | if (MBBI != MBB.begin()) { | |||
773 | do { | |||
774 | --MBBI; | |||
775 | } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); | |||
776 | if (!isCSRestore(MBBI, TII, CSRegs)) | |||
777 | ++MBBI; | |||
778 | } | |||
779 | ||||
780 | // Move SP to start of FP callee save spill area. | |||
781 | NumBytes -= (ArgRegsSaveSize + | |||
782 | AFI->getGPRCalleeSavedArea1Size() + | |||
783 | AFI->getGPRCalleeSavedArea2Size() + | |||
784 | AFI->getDPRCalleeSavedGapSize() + | |||
785 | AFI->getDPRCalleeSavedAreaSize()); | |||
786 | ||||
787 | // Reset SP based on frame pointer only if the stack frame extends beyond | |||
788 | // frame pointer stack slot or target is ELF and the function has FP. | |||
789 | if (AFI->shouldRestoreSPFromFP()) { | |||
790 | NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; | |||
791 | if (NumBytes) { | |||
792 | if (isARM) | |||
793 | emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, | |||
794 | ARMCC::AL, 0, TII); | |||
795 | else { | |||
796 | // It's not possible to restore SP from FP in a single instruction. | |||
797 | // For iOS, this looks like: | |||
798 | // mov sp, r7 | |||
799 | // sub sp, #24 | |||
800 | // This is bad, if an interrupt is taken after the mov, sp is in an | |||
801 | // inconsistent state. | |||
802 | // Use the first callee-saved register as a scratch register. | |||
803 | assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&((MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!" ) ? static_cast<void> (0) : __assert_fail ("MF.getRegInfo().isPhysRegUsed(ARM::R4) && \"No scratch register to restore SP from FP!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 804, __PRETTY_FUNCTION__)) | |||
804 | "No scratch register to restore SP from FP!")((MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!" ) ? static_cast<void> (0) : __assert_fail ("MF.getRegInfo().isPhysRegUsed(ARM::R4) && \"No scratch register to restore SP from FP!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 804, __PRETTY_FUNCTION__)); | |||
805 | emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, | |||
806 | ARMCC::AL, 0, TII); | |||
807 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), | |||
808 | ARM::SP) | |||
809 | .addReg(ARM::R4)); | |||
810 | } | |||
811 | } else { | |||
812 | // Thumb2 or ARM. | |||
813 | if (isARM) | |||
814 | BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) | |||
815 | .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); | |||
816 | else | |||
817 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), | |||
818 | ARM::SP) | |||
819 | .addReg(FramePtr)); | |||
820 | } | |||
821 | } else if (NumBytes && | |||
822 | !tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) | |||
823 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); | |||
824 | ||||
825 | // Increment past our save areas. | |||
826 | if (AFI->getDPRCalleeSavedAreaSize()) { | |||
827 | MBBI++; | |||
828 | // Since vpop register list cannot have gaps, there may be multiple vpop | |||
829 | // instructions in the epilogue. | |||
830 | while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) | |||
831 | MBBI++; | |||
832 | } | |||
833 | if (AFI->getDPRCalleeSavedGapSize()) { | |||
834 | assert(AFI->getDPRCalleeSavedGapSize() == 4 &&((AFI->getDPRCalleeSavedGapSize() == 4 && "unexpected DPR alignment gap" ) ? static_cast<void> (0) : __assert_fail ("AFI->getDPRCalleeSavedGapSize() == 4 && \"unexpected DPR alignment gap\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 835, __PRETTY_FUNCTION__)) | |||
835 | "unexpected DPR alignment gap")((AFI->getDPRCalleeSavedGapSize() == 4 && "unexpected DPR alignment gap" ) ? static_cast<void> (0) : __assert_fail ("AFI->getDPRCalleeSavedGapSize() == 4 && \"unexpected DPR alignment gap\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 835, __PRETTY_FUNCTION__)); | |||
836 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize()); | |||
837 | } | |||
838 | ||||
839 | if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; | |||
840 | if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; | |||
841 | } | |||
842 | ||||
843 | fixTCReturn(MF, MBB); | |||
844 | ||||
845 | if (ArgRegsSaveSize) | |||
846 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize); | |||
847 | } | |||
848 | ||||
849 | /// getFrameIndexReference - Provide a base+offset reference to an FI slot for | |||
850 | /// debug info. It's the same as what we use for resolving the code-gen | |||
851 | /// references for now. FIXME: This can go wrong when references are | |||
852 | /// SP-relative and simple call frames aren't used. | |||
853 | int | |||
854 | ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, | |||
855 | unsigned &FrameReg) const { | |||
856 | return ResolveFrameIndexReference(MF, FI, FrameReg, 0); | |||
857 | } | |||
858 | ||||
859 | int | |||
860 | ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, | |||
861 | int FI, unsigned &FrameReg, | |||
862 | int SPAdj) const { | |||
863 | const MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
864 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( | |||
865 | MF.getSubtarget().getRegisterInfo()); | |||
866 | const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
867 | int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); | |||
868 | int FPOffset = Offset - AFI->getFramePtrSpillOffset(); | |||
869 | bool isFixed = MFI->isFixedObjectIndex(FI); | |||
870 | ||||
871 | FrameReg = ARM::SP; | |||
872 | Offset += SPAdj; | |||
873 | ||||
874 | // SP can move around if there are allocas. We may also lose track of SP | |||
875 | // when emergency spilling inside a non-reserved call frame setup. | |||
876 | bool hasMovingSP = !hasReservedCallFrame(MF); | |||
877 | ||||
878 | // When dynamically realigning the stack, use the frame pointer for | |||
879 | // parameters, and the stack/base pointer for locals. | |||
880 | if (RegInfo->needsStackRealignment(MF)) { | |||
881 | assert (hasFP(MF) && "dynamic stack realignment without a FP!")((hasFP(MF) && "dynamic stack realignment without a FP!" ) ? static_cast<void> (0) : __assert_fail ("hasFP(MF) && \"dynamic stack realignment without a FP!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 881, __PRETTY_FUNCTION__)); | |||
882 | if (isFixed) { | |||
883 | FrameReg = RegInfo->getFrameRegister(MF); | |||
884 | Offset = FPOffset; | |||
885 | } else if (hasMovingSP) { | |||
886 | assert(RegInfo->hasBasePointer(MF) &&((RegInfo->hasBasePointer(MF) && "VLAs and dynamic stack alignment, but missing base pointer!" ) ? static_cast<void> (0) : __assert_fail ("RegInfo->hasBasePointer(MF) && \"VLAs and dynamic stack alignment, but missing base pointer!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 887, __PRETTY_FUNCTION__)) | |||
887 | "VLAs and dynamic stack alignment, but missing base pointer!")((RegInfo->hasBasePointer(MF) && "VLAs and dynamic stack alignment, but missing base pointer!" ) ? static_cast<void> (0) : __assert_fail ("RegInfo->hasBasePointer(MF) && \"VLAs and dynamic stack alignment, but missing base pointer!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 887, __PRETTY_FUNCTION__)); | |||
888 | FrameReg = RegInfo->getBaseRegister(); | |||
889 | } | |||
890 | return Offset; | |||
891 | } | |||
892 | ||||
893 | // If there is a frame pointer, use it when we can. | |||
894 | if (hasFP(MF) && AFI->hasStackFrame()) { | |||
895 | // Use frame pointer to reference fixed objects. Use it for locals if | |||
896 | // there are VLAs (and thus the SP isn't reliable as a base). | |||
897 | if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) { | |||
898 | FrameReg = RegInfo->getFrameRegister(MF); | |||
899 | return FPOffset; | |||
900 | } else if (hasMovingSP) { | |||
901 | assert(RegInfo->hasBasePointer(MF) && "missing base pointer!")((RegInfo->hasBasePointer(MF) && "missing base pointer!" ) ? static_cast<void> (0) : __assert_fail ("RegInfo->hasBasePointer(MF) && \"missing base pointer!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 901, __PRETTY_FUNCTION__)); | |||
902 | if (AFI->isThumb2Function()) { | |||
903 | // Try to use the frame pointer if we can, else use the base pointer | |||
904 | // since it's available. This is handy for the emergency spill slot, in | |||
905 | // particular. | |||
906 | if (FPOffset >= -255 && FPOffset < 0) { | |||
907 | FrameReg = RegInfo->getFrameRegister(MF); | |||
908 | return FPOffset; | |||
909 | } | |||
910 | } | |||
911 | } else if (AFI->isThumb2Function()) { | |||
912 | // Use add <rd>, sp, #<imm8> | |||
913 | // ldr <rd>, [sp, #<imm8>] | |||
914 | // if at all possible to save space. | |||
915 | if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020) | |||
916 | return Offset; | |||
917 | // In Thumb2 mode, the negative offset is very limited. Try to avoid | |||
918 | // out of range references. ldr <rt>,[<rn>, #-<imm8>] | |||
919 | if (FPOffset >= -255 && FPOffset < 0) { | |||
920 | FrameReg = RegInfo->getFrameRegister(MF); | |||
921 | return FPOffset; | |||
922 | } | |||
923 | } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) { | |||
924 | // Otherwise, use SP or FP, whichever is closer to the stack slot. | |||
925 | FrameReg = RegInfo->getFrameRegister(MF); | |||
926 | return FPOffset; | |||
927 | } | |||
928 | } | |||
929 | // Use the base pointer if we have one. | |||
930 | if (RegInfo->hasBasePointer(MF)) | |||
931 | FrameReg = RegInfo->getBaseRegister(); | |||
932 | return Offset; | |||
933 | } | |||
934 | ||||
935 | int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF, | |||
936 | int FI) const { | |||
937 | unsigned FrameReg; | |||
938 | return getFrameIndexReference(MF, FI, FrameReg); | |||
939 | } | |||
940 | ||||
941 | void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, | |||
942 | MachineBasicBlock::iterator MI, | |||
943 | const std::vector<CalleeSavedInfo> &CSI, | |||
944 | unsigned StmOpc, unsigned StrOpc, | |||
945 | bool NoGap, | |||
946 | bool(*Func)(unsigned, bool), | |||
947 | unsigned NumAlignedDPRCS2Regs, | |||
948 | unsigned MIFlags) const { | |||
949 | MachineFunction &MF = *MBB.getParent(); | |||
950 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); | |||
951 | ||||
952 | DebugLoc DL; | |||
953 | if (MI != MBB.end()) DL = MI->getDebugLoc(); | |||
954 | ||||
955 | SmallVector<std::pair<unsigned,bool>, 4> Regs; | |||
956 | unsigned i = CSI.size(); | |||
957 | while (i != 0) { | |||
958 | unsigned LastReg = 0; | |||
959 | for (; i != 0; --i) { | |||
960 | unsigned Reg = CSI[i-1].getReg(); | |||
961 | if (!(Func)(Reg, STI.isTargetDarwin())) continue; | |||
962 | ||||
963 | // D-registers in the aligned area DPRCS2 are NOT spilled here. | |||
964 | if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) | |||
965 | continue; | |||
966 | ||||
967 | // Add the callee-saved register as live-in unless it's LR and | |||
968 | // @llvm.returnaddress is called. If LR is returned for | |||
969 | // @llvm.returnaddress then it's already added to the function and | |||
970 | // entry block live-in sets. | |||
971 | bool isKill = true; | |||
972 | if (Reg == ARM::LR) { | |||
973 | if (MF.getFrameInfo()->isReturnAddressTaken() && | |||
974 | MF.getRegInfo().isLiveIn(Reg)) | |||
975 | isKill = false; | |||
976 | } | |||
977 | ||||
978 | if (isKill) | |||
979 | MBB.addLiveIn(Reg); | |||
980 | ||||
981 | // If NoGap is true, push consecutive registers and then leave the rest | |||
982 | // for other instructions. e.g. | |||
983 | // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11} | |||
984 | if (NoGap && LastReg && LastReg != Reg-1) | |||
985 | break; | |||
986 | LastReg = Reg; | |||
987 | Regs.push_back(std::make_pair(Reg, isKill)); | |||
988 | } | |||
989 | ||||
990 | if (Regs.empty()) | |||
991 | continue; | |||
992 | if (Regs.size() > 1 || StrOpc== 0) { | |||
993 | MachineInstrBuilder MIB = | |||
994 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) | |||
995 | .addReg(ARM::SP).setMIFlags(MIFlags)); | |||
996 | for (unsigned i = 0, e = Regs.size(); i < e; ++i) | |||
997 | MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); | |||
998 | } else if (Regs.size() == 1) { | |||
999 | MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), | |||
1000 | ARM::SP) | |||
1001 | .addReg(Regs[0].first, getKillRegState(Regs[0].second)) | |||
1002 | .addReg(ARM::SP).setMIFlags(MIFlags) | |||
1003 | .addImm(-4); | |||
1004 | AddDefaultPred(MIB); | |||
1005 | } | |||
1006 | Regs.clear(); | |||
1007 | ||||
1008 | // Put any subsequent vpush instructions before this one: they will refer to | |||
1009 | // higher register numbers so need to be pushed first in order to preserve | |||
1010 | // monotonicity. | |||
1011 | --MI; | |||
1012 | } | |||
1013 | } | |||
1014 | ||||
1015 | void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, | |||
1016 | MachineBasicBlock::iterator MI, | |||
1017 | const std::vector<CalleeSavedInfo> &CSI, | |||
1018 | unsigned LdmOpc, unsigned LdrOpc, | |||
1019 | bool isVarArg, bool NoGap, | |||
1020 | bool(*Func)(unsigned, bool), | |||
1021 | unsigned NumAlignedDPRCS2Regs) const { | |||
1022 | MachineFunction &MF = *MBB.getParent(); | |||
1023 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); | |||
1024 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1025 | DebugLoc DL = MI->getDebugLoc(); | |||
1026 | unsigned RetOpcode = MI->getOpcode(); | |||
1027 | bool isTailCall = (RetOpcode == ARM::TCRETURNdi || | |||
1028 | RetOpcode == ARM::TCRETURNri); | |||
1029 | bool isInterrupt = | |||
1030 | RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; | |||
1031 | ||||
1032 | SmallVector<unsigned, 4> Regs; | |||
1033 | unsigned i = CSI.size(); | |||
1034 | while (i != 0) { | |||
1035 | unsigned LastReg = 0; | |||
1036 | bool DeleteRet = false; | |||
1037 | for (; i != 0; --i) { | |||
1038 | unsigned Reg = CSI[i-1].getReg(); | |||
1039 | if (!(Func)(Reg, STI.isTargetDarwin())) continue; | |||
1040 | ||||
1041 | // The aligned reloads from area DPRCS2 are not inserted here. | |||
1042 | if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) | |||
1043 | continue; | |||
1044 | ||||
1045 | if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && | |||
1046 | STI.hasV5TOps()) { | |||
1047 | Reg = ARM::PC; | |||
1048 | LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; | |||
1049 | // Fold the return instruction into the LDM. | |||
1050 | DeleteRet = true; | |||
1051 | } | |||
1052 | ||||
1053 | // If NoGap is true, pop consecutive registers and then leave the rest | |||
1054 | // for other instructions. e.g. | |||
1055 | // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11} | |||
1056 | if (NoGap && LastReg && LastReg != Reg-1) | |||
1057 | break; | |||
1058 | ||||
1059 | LastReg = Reg; | |||
1060 | Regs.push_back(Reg); | |||
1061 | } | |||
1062 | ||||
1063 | if (Regs.empty()) | |||
1064 | continue; | |||
1065 | if (Regs.size() > 1 || LdrOpc == 0) { | |||
1066 | MachineInstrBuilder MIB = | |||
1067 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) | |||
1068 | .addReg(ARM::SP)); | |||
1069 | for (unsigned i = 0, e = Regs.size(); i < e; ++i) | |||
1070 | MIB.addReg(Regs[i], getDefRegState(true)); | |||
1071 | if (DeleteRet) { | |||
1072 | MIB.copyImplicitOps(&*MI); | |||
1073 | MI->eraseFromParent(); | |||
1074 | } | |||
1075 | MI = MIB; | |||
1076 | } else if (Regs.size() == 1) { | |||
1077 | // If we adjusted the reg to PC from LR above, switch it back here. We | |||
1078 | // only do that for LDM. | |||
1079 | if (Regs[0] == ARM::PC) | |||
1080 | Regs[0] = ARM::LR; | |||
1081 | MachineInstrBuilder MIB = | |||
1082 | BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0]) | |||
1083 | .addReg(ARM::SP, RegState::Define) | |||
1084 | .addReg(ARM::SP); | |||
1085 | // ARM mode needs an extra reg0 here due to addrmode2. Will go away once | |||
1086 | // that refactoring is complete (eventually). | |||
1087 | if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) { | |||
1088 | MIB.addReg(0); | |||
1089 | MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); | |||
1090 | } else | |||
1091 | MIB.addImm(4); | |||
1092 | AddDefaultPred(MIB); | |||
1093 | } | |||
1094 | Regs.clear(); | |||
1095 | ||||
1096 | // Put any subsequent vpop instructions after this one: they will refer to | |||
1097 | // higher register numbers so need to be popped afterwards. | |||
1098 | ++MI; | |||
1099 | } | |||
1100 | } | |||
1101 | ||||
1102 | /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers | |||
1103 | /// starting from d8. Also insert stack realignment code and leave the stack | |||
1104 | /// pointer pointing to the d8 spill slot. | |||
1105 | static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, | |||
1106 | MachineBasicBlock::iterator MI, | |||
1107 | unsigned NumAlignedDPRCS2Regs, | |||
1108 | const std::vector<CalleeSavedInfo> &CSI, | |||
1109 | const TargetRegisterInfo *TRI) { | |||
1110 | MachineFunction &MF = *MBB.getParent(); | |||
1111 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1112 | DebugLoc DL = MI->getDebugLoc(); | |||
1113 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); | |||
1114 | MachineFrameInfo &MFI = *MF.getFrameInfo(); | |||
1115 | ||||
1116 | // Mark the D-register spill slots as properly aligned. Since MFI computes | |||
1117 | // stack slot layout backwards, this can actually mean that the d-reg stack | |||
1118 | // slot offsets can be wrong. The offset for d8 will always be correct. | |||
1119 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { | |||
1120 | unsigned DNum = CSI[i].getReg() - ARM::D8; | |||
1121 | if (DNum >= 8) | |||
1122 | continue; | |||
1123 | int FI = CSI[i].getFrameIdx(); | |||
1124 | // The even-numbered registers will be 16-byte aligned, the odd-numbered | |||
1125 | // registers will be 8-byte aligned. | |||
1126 | MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16); | |||
1127 | ||||
1128 | // The stack slot for D8 needs to be maximally aligned because this is | |||
1129 | // actually the point where we align the stack pointer. MachineFrameInfo | |||
1130 | // computes all offsets relative to the incoming stack pointer which is a | |||
1131 | // bit weird when realigning the stack. Any extra padding for this | |||
1132 | // over-alignment is not realized because the code inserted below adjusts | |||
1133 | // the stack pointer by numregs * 8 before aligning the stack pointer. | |||
1134 | if (DNum == 0) | |||
1135 | MFI.setObjectAlignment(FI, MFI.getMaxAlignment()); | |||
1136 | } | |||
1137 | ||||
1138 | // Move the stack pointer to the d8 spill slot, and align it at the same | |||
1139 | // time. Leave the stack slot address in the scratch register r4. | |||
1140 | // | |||
1141 | // sub r4, sp, #numregs * 8 | |||
1142 | // bic r4, r4, #align - 1 | |||
1143 | // mov sp, r4 | |||
1144 | // | |||
1145 | bool isThumb = AFI->isThumbFunction(); | |||
1146 | assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1")((!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"Can't realign stack for thumb1\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 1146, __PRETTY_FUNCTION__)); | |||
1147 | AFI->setShouldRestoreSPFromFP(true); | |||
1148 | ||||
1149 | // sub r4, sp, #numregs * 8 | |||
1150 | // The immediate is <= 64, so it doesn't need any special encoding. | |||
1151 | unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; | |||
1152 | AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) | |||
1153 | .addReg(ARM::SP) | |||
1154 | .addImm(8 * NumAlignedDPRCS2Regs))); | |||
1155 | ||||
1156 | unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); | |||
1157 | // We must set parameter MustBeSingleInstruction to true, since | |||
1158 | // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform | |||
1159 | // stack alignment. Luckily, this can always be done since all ARM | |||
1160 | // architecture versions that support Neon also support the BFC | |||
1161 | // instruction. | |||
1162 | emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true); | |||
1163 | ||||
1164 | // mov sp, r4 | |||
1165 | // The stack pointer must be adjusted before spilling anything, otherwise | |||
1166 | // the stack slots could be clobbered by an interrupt handler. | |||
1167 | // Leave r4 live, it is used below. | |||
1168 | Opc = isThumb ? ARM::tMOVr : ARM::MOVr; | |||
1169 | MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP) | |||
1170 | .addReg(ARM::R4); | |||
1171 | MIB = AddDefaultPred(MIB); | |||
1172 | if (!isThumb) | |||
1173 | AddDefaultCC(MIB); | |||
1174 | ||||
1175 | // Now spill NumAlignedDPRCS2Regs registers starting from d8. | |||
1176 | // r4 holds the stack slot address. | |||
1177 | unsigned NextReg = ARM::D8; | |||
1178 | ||||
1179 | // 16-byte aligned vst1.64 with 4 d-regs and address writeback. | |||
1180 | // The writeback is only needed when emitting two vst1.64 instructions. | |||
1181 | if (NumAlignedDPRCS2Regs >= 6) { | |||
1182 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |||
1183 | &ARM::QQPRRegClass); | |||
1184 | MBB.addLiveIn(SupReg); | |||
1185 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), | |||
1186 | ARM::R4) | |||
1187 | .addReg(ARM::R4, RegState::Kill).addImm(16) | |||
1188 | .addReg(NextReg) | |||
1189 | .addReg(SupReg, RegState::ImplicitKill)); | |||
1190 | NextReg += 4; | |||
1191 | NumAlignedDPRCS2Regs -= 4; | |||
1192 | } | |||
1193 | ||||
1194 | // We won't modify r4 beyond this point. It currently points to the next | |||
1195 | // register to be spilled. | |||
1196 | unsigned R4BaseReg = NextReg; | |||
1197 | ||||
1198 | // 16-byte aligned vst1.64 with 4 d-regs, no writeback. | |||
1199 | if (NumAlignedDPRCS2Regs >= 4) { | |||
1200 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |||
1201 | &ARM::QQPRRegClass); | |||
1202 | MBB.addLiveIn(SupReg); | |||
1203 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) | |||
1204 | .addReg(ARM::R4).addImm(16).addReg(NextReg) | |||
1205 | .addReg(SupReg, RegState::ImplicitKill)); | |||
1206 | NextReg += 4; | |||
1207 | NumAlignedDPRCS2Regs -= 4; | |||
1208 | } | |||
1209 | ||||
1210 | // 16-byte aligned vst1.64 with 2 d-regs. | |||
1211 | if (NumAlignedDPRCS2Regs >= 2) { | |||
1212 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |||
1213 | &ARM::QPRRegClass); | |||
1214 | MBB.addLiveIn(SupReg); | |||
1215 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) | |||
1216 | .addReg(ARM::R4).addImm(16).addReg(SupReg)); | |||
1217 | NextReg += 2; | |||
1218 | NumAlignedDPRCS2Regs -= 2; | |||
1219 | } | |||
1220 | ||||
1221 | // Finally, use a vanilla vstr.64 for the odd last register. | |||
1222 | if (NumAlignedDPRCS2Regs) { | |||
1223 | MBB.addLiveIn(NextReg); | |||
1224 | // vstr.64 uses addrmode5 which has an offset scale of 4. | |||
1225 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) | |||
1226 | .addReg(NextReg) | |||
1227 | .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2)); | |||
1228 | } | |||
1229 | ||||
1230 | // The last spill instruction inserted should kill the scratch register r4. | |||
1231 | std::prev(MI)->addRegisterKilled(ARM::R4, TRI); | |||
1232 | } | |||
1233 | ||||
1234 | /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an | |||
1235 | /// iterator to the following instruction. | |||
1236 | static MachineBasicBlock::iterator | |||
1237 | skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, | |||
1238 | unsigned NumAlignedDPRCS2Regs) { | |||
1239 | // sub r4, sp, #numregs * 8 | |||
1240 | // bic r4, r4, #align - 1 | |||
1241 | // mov sp, r4 | |||
1242 | ++MI; ++MI; ++MI; | |||
1243 | assert(MI->mayStore() && "Expecting spill instruction")((MI->mayStore() && "Expecting spill instruction") ? static_cast<void> (0) : __assert_fail ("MI->mayStore() && \"Expecting spill instruction\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 1243, __PRETTY_FUNCTION__)); | |||
1244 | ||||
1245 | // These switches all fall through. | |||
1246 | switch(NumAlignedDPRCS2Regs) { | |||
1247 | case 7: | |||
1248 | ++MI; | |||
1249 | assert(MI->mayStore() && "Expecting spill instruction")((MI->mayStore() && "Expecting spill instruction") ? static_cast<void> (0) : __assert_fail ("MI->mayStore() && \"Expecting spill instruction\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 1249, __PRETTY_FUNCTION__)); | |||
1250 | default: | |||
1251 | ++MI; | |||
1252 | assert(MI->mayStore() && "Expecting spill instruction")((MI->mayStore() && "Expecting spill instruction") ? static_cast<void> (0) : __assert_fail ("MI->mayStore() && \"Expecting spill instruction\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 1252, __PRETTY_FUNCTION__)); | |||
1253 | case 1: | |||
1254 | case 2: | |||
1255 | case 4: | |||
1256 | assert(MI->killsRegister(ARM::R4) && "Missed kill flag")((MI->killsRegister(ARM::R4) && "Missed kill flag" ) ? static_cast<void> (0) : __assert_fail ("MI->killsRegister(ARM::R4) && \"Missed kill flag\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 1256, __PRETTY_FUNCTION__)); | |||
1257 | ++MI; | |||
1258 | } | |||
1259 | return MI; | |||
1260 | } | |||
1261 | ||||
1262 | /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers | |||
1263 | /// starting from d8. These instructions are assumed to execute while the | |||
1264 | /// stack is still aligned, unlike the code inserted by emitPopInst. | |||
1265 | static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, | |||
1266 | MachineBasicBlock::iterator MI, | |||
1267 | unsigned NumAlignedDPRCS2Regs, | |||
1268 | const std::vector<CalleeSavedInfo> &CSI, | |||
1269 | const TargetRegisterInfo *TRI) { | |||
1270 | MachineFunction &MF = *MBB.getParent(); | |||
1271 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1272 | DebugLoc DL = MI->getDebugLoc(); | |||
1273 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); | |||
1274 | ||||
1275 | // Find the frame index assigned to d8. | |||
1276 | int D8SpillFI = 0; | |||
1277 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) | |||
1278 | if (CSI[i].getReg() == ARM::D8) { | |||
1279 | D8SpillFI = CSI[i].getFrameIdx(); | |||
1280 | break; | |||
1281 | } | |||
1282 | ||||
1283 | // Materialize the address of the d8 spill slot into the scratch register r4. | |||
1284 | // This can be fairly complicated if the stack frame is large, so just use | |||
1285 | // the normal frame index elimination mechanism to do it. This code runs as | |||
1286 | // the initial part of the epilog where the stack and base pointers haven't | |||
1287 | // been changed yet. | |||
1288 | bool isThumb = AFI->isThumbFunction(); | |||
1289 | assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1")((!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"Can't realign stack for thumb1\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 1289, __PRETTY_FUNCTION__)); | |||
1290 | ||||
1291 | unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; | |||
1292 | AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) | |||
1293 | .addFrameIndex(D8SpillFI).addImm(0))); | |||
1294 | ||||
1295 | // Now restore NumAlignedDPRCS2Regs registers starting from d8. | |||
1296 | unsigned NextReg = ARM::D8; | |||
1297 | ||||
1298 | // 16-byte aligned vld1.64 with 4 d-regs and writeback. | |||
1299 | if (NumAlignedDPRCS2Regs >= 6) { | |||
1300 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |||
1301 | &ARM::QQPRRegClass); | |||
1302 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) | |||
1303 | .addReg(ARM::R4, RegState::Define) | |||
1304 | .addReg(ARM::R4, RegState::Kill).addImm(16) | |||
1305 | .addReg(SupReg, RegState::ImplicitDefine)); | |||
1306 | NextReg += 4; | |||
1307 | NumAlignedDPRCS2Regs -= 4; | |||
1308 | } | |||
1309 | ||||
1310 | // We won't modify r4 beyond this point. It currently points to the next | |||
1311 | // register to be spilled. | |||
1312 | unsigned R4BaseReg = NextReg; | |||
1313 | ||||
1314 | // 16-byte aligned vld1.64 with 4 d-regs, no writeback. | |||
1315 | if (NumAlignedDPRCS2Regs >= 4) { | |||
1316 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |||
1317 | &ARM::QQPRRegClass); | |||
1318 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) | |||
1319 | .addReg(ARM::R4).addImm(16) | |||
1320 | .addReg(SupReg, RegState::ImplicitDefine)); | |||
1321 | NextReg += 4; | |||
1322 | NumAlignedDPRCS2Regs -= 4; | |||
1323 | } | |||
1324 | ||||
1325 | // 16-byte aligned vld1.64 with 2 d-regs. | |||
1326 | if (NumAlignedDPRCS2Regs >= 2) { | |||
1327 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |||
1328 | &ARM::QPRRegClass); | |||
1329 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) | |||
1330 | .addReg(ARM::R4).addImm(16)); | |||
1331 | NextReg += 2; | |||
1332 | NumAlignedDPRCS2Regs -= 2; | |||
1333 | } | |||
1334 | ||||
1335 | // Finally, use a vanilla vldr.64 for the remaining odd register. | |||
1336 | if (NumAlignedDPRCS2Regs) | |||
1337 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) | |||
1338 | .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg))); | |||
1339 | ||||
1340 | // Last store kills r4. | |||
1341 | std::prev(MI)->addRegisterKilled(ARM::R4, TRI); | |||
1342 | } | |||
1343 | ||||
1344 | bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, | |||
1345 | MachineBasicBlock::iterator MI, | |||
1346 | const std::vector<CalleeSavedInfo> &CSI, | |||
1347 | const TargetRegisterInfo *TRI) const { | |||
1348 | if (CSI.empty()) | |||
1349 | return false; | |||
1350 | ||||
1351 | MachineFunction &MF = *MBB.getParent(); | |||
1352 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1353 | ||||
1354 | unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD; | |||
1355 | unsigned PushOneOpc = AFI->isThumbFunction() ? | |||
1356 | ARM::t2STR_PRE : ARM::STR_PRE_IMM; | |||
1357 | unsigned FltOpc = ARM::VSTMDDB_UPD; | |||
1358 | unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); | |||
1359 | emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, | |||
1360 | MachineInstr::FrameSetup); | |||
1361 | emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, | |||
1362 | MachineInstr::FrameSetup); | |||
1363 | emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, | |||
1364 | NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); | |||
1365 | ||||
1366 | // The code above does not insert spill code for the aligned DPRCS2 registers. | |||
1367 | // The stack realignment code will be inserted between the push instructions | |||
1368 | // and these spills. | |||
1369 | if (NumAlignedDPRCS2Regs) | |||
1370 | emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); | |||
1371 | ||||
1372 | return true; | |||
1373 | } | |||
1374 | ||||
1375 | bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, | |||
1376 | MachineBasicBlock::iterator MI, | |||
1377 | const std::vector<CalleeSavedInfo> &CSI, | |||
1378 | const TargetRegisterInfo *TRI) const { | |||
1379 | if (CSI.empty()) | |||
1380 | return false; | |||
1381 | ||||
1382 | MachineFunction &MF = *MBB.getParent(); | |||
1383 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1384 | bool isVarArg = AFI->getArgRegsSaveSize() > 0; | |||
1385 | unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); | |||
1386 | ||||
1387 | // The emitPopInst calls below do not insert reloads for the aligned DPRCS2 | |||
1388 | // registers. Do that here instead. | |||
1389 | if (NumAlignedDPRCS2Regs) | |||
1390 | emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); | |||
1391 | ||||
1392 | unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; | |||
1393 | unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; | |||
1394 | unsigned FltOpc = ARM::VLDMDIA_UPD; | |||
1395 | emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, | |||
1396 | NumAlignedDPRCS2Regs); | |||
1397 | emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, | |||
1398 | &isARMArea2Register, 0); | |||
1399 | emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, | |||
1400 | &isARMArea1Register, 0); | |||
1401 | ||||
1402 | return true; | |||
1403 | } | |||
1404 | ||||
1405 | // FIXME: Make generic? | |||
1406 | static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, | |||
1407 | const ARMBaseInstrInfo &TII) { | |||
1408 | unsigned FnSize = 0; | |||
1409 | for (auto &MBB : MF) { | |||
1410 | for (auto &MI : MBB) | |||
1411 | FnSize += TII.GetInstSizeInBytes(&MI); | |||
1412 | } | |||
1413 | return FnSize; | |||
1414 | } | |||
1415 | ||||
1416 | /// estimateRSStackSizeLimit - Look at each instruction that references stack | |||
1417 | /// frames and return the stack size limit beyond which some of these | |||
1418 | /// instructions will require a scratch register during their expansion later. | |||
1419 | // FIXME: Move to TII? | |||
1420 | static unsigned estimateRSStackSizeLimit(MachineFunction &MF, | |||
1421 | const TargetFrameLowering *TFI) { | |||
1422 | const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1423 | unsigned Limit = (1 << 12) - 1; | |||
1424 | for (auto &MBB : MF) { | |||
1425 | for (auto &MI : MBB) { | |||
1426 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { | |||
1427 | if (!MI.getOperand(i).isFI()) | |||
1428 | continue; | |||
1429 | ||||
1430 | // When using ADDri to get the address of a stack object, 255 is the | |||
1431 | // largest offset guaranteed to fit in the immediate offset. | |||
1432 | if (MI.getOpcode() == ARM::ADDri) { | |||
1433 | Limit = std::min(Limit, (1U << 8) - 1); | |||
1434 | break; | |||
1435 | } | |||
1436 | ||||
1437 | // Otherwise check the addressing mode. | |||
1438 | switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) { | |||
1439 | case ARMII::AddrMode3: | |||
1440 | case ARMII::AddrModeT2_i8: | |||
1441 | Limit = std::min(Limit, (1U << 8) - 1); | |||
1442 | break; | |||
1443 | case ARMII::AddrMode5: | |||
1444 | case ARMII::AddrModeT2_i8s4: | |||
1445 | Limit = std::min(Limit, ((1U << 8) - 1) * 4); | |||
1446 | break; | |||
1447 | case ARMII::AddrModeT2_i12: | |||
1448 | // i12 supports only positive offset so these will be converted to | |||
1449 | // i8 opcodes. See llvm::rewriteT2FrameIndex. | |||
1450 | if (TFI->hasFP(MF) && AFI->hasStackFrame()) | |||
1451 | Limit = std::min(Limit, (1U << 8) - 1); | |||
1452 | break; | |||
1453 | case ARMII::AddrMode4: | |||
1454 | case ARMII::AddrMode6: | |||
1455 | // Addressing modes 4 & 6 (load/store) instructions can't encode an | |||
1456 | // immediate offset for stack references. | |||
1457 | return 0; | |||
1458 | default: | |||
1459 | break; | |||
1460 | } | |||
1461 | break; // At most one FI per instruction | |||
1462 | } | |||
1463 | } | |||
1464 | } | |||
1465 | ||||
1466 | return Limit; | |||
1467 | } | |||
1468 | ||||
1469 | // In functions that realign the stack, it can be an advantage to spill the | |||
1470 | // callee-saved vector registers after realigning the stack. The vst1 and vld1 | |||
1471 | // instructions take alignment hints that can improve performance. | |||
1472 | // | |||
1473 | static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { | |||
1474 | MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0); | |||
1475 | if (!SpillAlignedNEONRegs) | |||
1476 | return; | |||
1477 | ||||
1478 | // Naked functions don't spill callee-saved registers. | |||
1479 | if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) | |||
1480 | return; | |||
1481 | ||||
1482 | // We are planning to use NEON instructions vst1 / vld1. | |||
1483 | if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON()) | |||
1484 | return; | |||
1485 | ||||
1486 | // Don't bother if the default stack alignment is sufficiently high. | |||
1487 | if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8) | |||
1488 | return; | |||
1489 | ||||
1490 | // Aligned spills require stack realignment. | |||
1491 | if (!static_cast<const ARMBaseRegisterInfo *>( | |||
1492 | MF.getSubtarget().getRegisterInfo())->canRealignStack(MF)) | |||
1493 | return; | |||
1494 | ||||
1495 | // We always spill contiguous d-registers starting from d8. Count how many | |||
1496 | // needs spilling. The register allocator will almost always use the | |||
1497 | // callee-saved registers in order, but it can happen that there are holes in | |||
1498 | // the range. Registers above the hole will be spilled to the standard DPRCS | |||
1499 | // area. | |||
1500 | MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
1501 | unsigned NumSpills = 0; | |||
1502 | for (; NumSpills < 8; ++NumSpills) | |||
1503 | if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills)) | |||
1504 | break; | |||
1505 | ||||
1506 | // Don't do this for just one d-register. It's not worth it. | |||
1507 | if (NumSpills < 2) | |||
1508 | return; | |||
1509 | ||||
1510 | // Spill the first NumSpills D-registers after realigning the stack. | |||
1511 | MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills); | |||
1512 | ||||
1513 | // A scratch register is required for the vst1 / vld1 instructions. | |||
1514 | MF.getRegInfo().setPhysRegUsed(ARM::R4); | |||
1515 | } | |||
1516 | ||||
1517 | void | |||
1518 | ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, | |||
1519 | RegScavenger *RS) const { | |||
1520 | // This tells PEI to spill the FP as if it is any other callee-save register | |||
1521 | // to take advantage the eliminateFrameIndex machinery. This also ensures it | |||
1522 | // is spilled in the order specified by getCalleeSavedRegs() to make it easier | |||
1523 | // to combine multiple loads / stores. | |||
1524 | bool CanEliminateFrame = true; | |||
1525 | bool CS1Spilled = false; | |||
1526 | bool LRSpilled = false; | |||
1527 | unsigned NumGPRSpills = 0; | |||
1528 | SmallVector<unsigned, 4> UnspilledCS1GPRs; | |||
1529 | SmallVector<unsigned, 4> UnspilledCS2GPRs; | |||
1530 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( | |||
1531 | MF.getSubtarget().getRegisterInfo()); | |||
1532 | const ARMBaseInstrInfo &TII = | |||
1533 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); | |||
1534 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1535 | MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
1536 | MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
1537 | unsigned FramePtr = RegInfo->getFrameRegister(MF); | |||
1538 | ||||
1539 | // Spill R4 if Thumb2 function requires stack realignment - it will be used as | |||
1540 | // scratch register. Also spill R4 if Thumb2 function has varsized objects, | |||
1541 | // since it's not always possible to restore sp from fp in a single | |||
1542 | // instruction. | |||
1543 | // FIXME: It will be better just to find spare register here. | |||
1544 | if (AFI->isThumb2Function() && | |||
1545 | (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) | |||
1546 | MRI.setPhysRegUsed(ARM::R4); | |||
1547 | ||||
1548 | if (AFI->isThumb1OnlyFunction()) { | |||
| ||||
1549 | // Spill LR if Thumb1 function uses variable length argument lists. | |||
1550 | if (AFI->getArgRegsSaveSize() > 0) | |||
1551 | MRI.setPhysRegUsed(ARM::LR); | |||
1552 | ||||
1553 | // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know | |||
1554 | // for sure what the stack size will be, but for this, an estimate is good | |||
1555 | // enough. If there anything changes it, it'll be a spill, which implies | |||
1556 | // we've used all the registers and so R4 is already used, so not marking | |||
1557 | // it here will be OK. | |||
1558 | // FIXME: It will be better just to find spare register here. | |||
1559 | unsigned StackSize = MFI->estimateStackSize(MF); | |||
1560 | if (MFI->hasVarSizedObjects() || StackSize > 508) | |||
1561 | MRI.setPhysRegUsed(ARM::R4); | |||
1562 | } | |||
1563 | ||||
1564 | // See if we can spill vector registers to aligned stack. | |||
1565 | checkNumAlignedDPRCS2Regs(MF); | |||
1566 | ||||
1567 | // Spill the BasePtr if it's used. | |||
1568 | if (RegInfo->hasBasePointer(MF)) | |||
1569 | MRI.setPhysRegUsed(RegInfo->getBaseRegister()); | |||
1570 | ||||
1571 | // Don't spill FP if the frame can be eliminated. This is determined | |||
1572 | // by scanning the callee-save registers to see if any is used. | |||
1573 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); | |||
1574 | for (unsigned i = 0; CSRegs[i]; ++i) { | |||
1575 | unsigned Reg = CSRegs[i]; | |||
1576 | bool Spilled = false; | |||
1577 | if (MRI.isPhysRegUsed(Reg)) { | |||
1578 | Spilled = true; | |||
1579 | CanEliminateFrame = false; | |||
1580 | } | |||
1581 | ||||
1582 | if (!ARM::GPRRegClass.contains(Reg)) | |||
1583 | continue; | |||
1584 | ||||
1585 | if (Spilled) { | |||
1586 | NumGPRSpills++; | |||
1587 | ||||
1588 | if (!STI.isTargetDarwin()) { | |||
1589 | if (Reg == ARM::LR) | |||
1590 | LRSpilled = true; | |||
1591 | CS1Spilled = true; | |||
1592 | continue; | |||
1593 | } | |||
1594 | ||||
1595 | // Keep track if LR and any of R4, R5, R6, and R7 is spilled. | |||
1596 | switch (Reg) { | |||
1597 | case ARM::LR: | |||
1598 | LRSpilled = true; | |||
1599 | // Fallthrough | |||
1600 | case ARM::R0: case ARM::R1: | |||
1601 | case ARM::R2: case ARM::R3: | |||
1602 | case ARM::R4: case ARM::R5: | |||
1603 | case ARM::R6: case ARM::R7: | |||
1604 | CS1Spilled = true; | |||
1605 | break; | |||
1606 | default: | |||
1607 | break; | |||
1608 | } | |||
1609 | } else { | |||
1610 | if (!STI.isTargetDarwin()) { | |||
1611 | UnspilledCS1GPRs.push_back(Reg); | |||
1612 | continue; | |||
1613 | } | |||
1614 | ||||
1615 | switch (Reg) { | |||
1616 | case ARM::R0: case ARM::R1: | |||
1617 | case ARM::R2: case ARM::R3: | |||
1618 | case ARM::R4: case ARM::R5: | |||
1619 | case ARM::R6: case ARM::R7: | |||
1620 | case ARM::LR: | |||
1621 | UnspilledCS1GPRs.push_back(Reg); | |||
1622 | break; | |||
1623 | default: | |||
1624 | UnspilledCS2GPRs.push_back(Reg); | |||
1625 | break; | |||
1626 | } | |||
1627 | } | |||
1628 | } | |||
1629 | ||||
1630 | bool ForceLRSpill = false; | |||
1631 | if (!LRSpilled && AFI->isThumb1OnlyFunction()) { | |||
1632 | unsigned FnSize = GetFunctionSizeInBytes(MF, TII); | |||
1633 | // Force LR to be spilled if the Thumb function size is > 2048. This enables | |||
1634 | // use of BL to implement far jump. If it turns out that it's not needed | |||
1635 | // then the branch fix up path will undo it. | |||
1636 | if (FnSize >= (1 << 11)) { | |||
1637 | CanEliminateFrame = false; | |||
1638 | ForceLRSpill = true; | |||
1639 | } | |||
1640 | } | |||
1641 | ||||
1642 | // If any of the stack slot references may be out of range of an immediate | |||
1643 | // offset, make sure a register (or a spill slot) is available for the | |||
1644 | // register scavenger. Note that if we're indexing off the frame pointer, the | |||
1645 | // effective stack size is 4 bytes larger since the FP points to the stack | |||
1646 | // slot of the previous FP. Also, if we have variable sized objects in the | |||
1647 | // function, stack slot references will often be negative, and some of | |||
1648 | // our instructions are positive-offset only, so conservatively consider | |||
1649 | // that case to want a spill slot (or register) as well. Similarly, if | |||
1650 | // the function adjusts the stack pointer during execution and the | |||
1651 | // adjustments aren't already part of our stack size estimate, our offset | |||
1652 | // calculations may be off, so be conservative. | |||
1653 | // FIXME: We could add logic to be more precise about negative offsets | |||
1654 | // and which instructions will need a scratch register for them. Is it | |||
1655 | // worth the effort and added fragility? | |||
1656 | bool BigStack = | |||
1657 | (RS && | |||
1658 | (MFI->estimateStackSize(MF) + | |||
1659 | ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= | |||
1660 | estimateRSStackSizeLimit(MF, this))) | |||
1661 | || MFI->hasVarSizedObjects() | |||
1662 | || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); | |||
1663 | ||||
1664 | bool ExtraCSSpill = false; | |||
1665 | if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { | |||
1666 | AFI->setHasStackFrame(true); | |||
1667 | ||||
1668 | // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. | |||
1669 | // Spill LR as well so we can fold BX_RET to the registers restore (LDM). | |||
1670 | if (!LRSpilled && CS1Spilled) { | |||
1671 | MRI.setPhysRegUsed(ARM::LR); | |||
1672 | NumGPRSpills++; | |||
1673 | SmallVectorImpl<unsigned>::iterator LRPos; | |||
1674 | LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), | |||
1675 | (unsigned)ARM::LR); | |||
1676 | if (LRPos != UnspilledCS1GPRs.end()) | |||
1677 | UnspilledCS1GPRs.erase(LRPos); | |||
1678 | ||||
1679 | ForceLRSpill = false; | |||
1680 | ExtraCSSpill = true; | |||
1681 | } | |||
1682 | ||||
1683 | if (hasFP(MF)) { | |||
1684 | MRI.setPhysRegUsed(FramePtr); | |||
1685 | auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), | |||
1686 | FramePtr); | |||
1687 | if (FPPos != UnspilledCS1GPRs.end()) | |||
1688 | UnspilledCS1GPRs.erase(FPPos); | |||
1689 | NumGPRSpills++; | |||
1690 | } | |||
1691 | ||||
1692 | // If stack and double are 8-byte aligned and we are spilling an odd number | |||
1693 | // of GPRs, spill one extra callee save GPR so we won't have to pad between | |||
1694 | // the integer and double callee save areas. | |||
1695 | unsigned TargetAlign = getStackAlignment(); | |||
1696 | if (TargetAlign >= 8 && (NumGPRSpills & 1)) { | |||
1697 | if (CS1Spilled && !UnspilledCS1GPRs.empty()) { | |||
1698 | for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { | |||
1699 | unsigned Reg = UnspilledCS1GPRs[i]; | |||
1700 | // Don't spill high register if the function is thumb | |||
1701 | if (!AFI->isThumbFunction() || | |||
1702 | isARMLowRegister(Reg) || Reg == ARM::LR) { | |||
1703 | MRI.setPhysRegUsed(Reg); | |||
1704 | if (!MRI.isReserved(Reg)) | |||
1705 | ExtraCSSpill = true; | |||
1706 | break; | |||
1707 | } | |||
1708 | } | |||
1709 | } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { | |||
1710 | unsigned Reg = UnspilledCS2GPRs.front(); | |||
1711 | MRI.setPhysRegUsed(Reg); | |||
1712 | if (!MRI.isReserved(Reg)) | |||
1713 | ExtraCSSpill = true; | |||
1714 | } | |||
1715 | } | |||
1716 | ||||
1717 | // Estimate if we might need to scavenge a register at some point in order | |||
1718 | // to materialize a stack offset. If so, either spill one additional | |||
1719 | // callee-saved register or reserve a special spill slot to facilitate | |||
1720 | // register scavenging. Thumb1 needs a spill slot for stack pointer | |||
1721 | // adjustments also, even when the frame itself is small. | |||
1722 | if (BigStack && !ExtraCSSpill) { | |||
1723 | // If any non-reserved CS register isn't spilled, just spill one or two | |||
1724 | // extra. That should take care of it! | |||
1725 | unsigned NumExtras = TargetAlign / 4; | |||
1726 | SmallVector<unsigned, 2> Extras; | |||
1727 | while (NumExtras && !UnspilledCS1GPRs.empty()) { | |||
1728 | unsigned Reg = UnspilledCS1GPRs.back(); | |||
1729 | UnspilledCS1GPRs.pop_back(); | |||
1730 | if (!MRI.isReserved(Reg) && | |||
1731 | (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || | |||
1732 | Reg == ARM::LR)) { | |||
1733 | Extras.push_back(Reg); | |||
1734 | NumExtras--; | |||
1735 | } | |||
1736 | } | |||
1737 | // For non-Thumb1 functions, also check for hi-reg CS registers | |||
1738 | if (!AFI->isThumb1OnlyFunction()) { | |||
1739 | while (NumExtras && !UnspilledCS2GPRs.empty()) { | |||
1740 | unsigned Reg = UnspilledCS2GPRs.back(); | |||
1741 | UnspilledCS2GPRs.pop_back(); | |||
1742 | if (!MRI.isReserved(Reg)) { | |||
1743 | Extras.push_back(Reg); | |||
1744 | NumExtras--; | |||
1745 | } | |||
1746 | } | |||
1747 | } | |||
1748 | if (Extras.size() && NumExtras == 0) { | |||
1749 | for (unsigned i = 0, e = Extras.size(); i != e; ++i) { | |||
1750 | MRI.setPhysRegUsed(Extras[i]); | |||
1751 | } | |||
1752 | } else if (!AFI->isThumb1OnlyFunction()) { | |||
1753 | // note: Thumb1 functions spill to R12, not the stack. Reserve a slot | |||
1754 | // closest to SP or frame pointer. | |||
1755 | const TargetRegisterClass *RC = &ARM::GPRRegClass; | |||
1756 | RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), | |||
| ||||
1757 | RC->getAlignment(), | |||
1758 | false)); | |||
1759 | } | |||
1760 | } | |||
1761 | } | |||
1762 | ||||
1763 | if (ForceLRSpill) { | |||
1764 | MRI.setPhysRegUsed(ARM::LR); | |||
1765 | AFI->setLRIsSpilledForFarJump(true); | |||
1766 | } | |||
1767 | } | |||
1768 | ||||
1769 | ||||
1770 | void ARMFrameLowering:: | |||
1771 | eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, | |||
1772 | MachineBasicBlock::iterator I) const { | |||
1773 | const ARMBaseInstrInfo &TII = | |||
1774 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); | |||
1775 | if (!hasReservedCallFrame(MF)) { | |||
1776 | // If we have alloca, convert as follows: | |||
1777 | // ADJCALLSTACKDOWN -> sub, sp, sp, amount | |||
1778 | // ADJCALLSTACKUP -> add, sp, sp, amount | |||
1779 | MachineInstr *Old = I; | |||
1780 | DebugLoc dl = Old->getDebugLoc(); | |||
1781 | unsigned Amount = Old->getOperand(0).getImm(); | |||
1782 | if (Amount != 0) { | |||
1783 | // We need to keep the stack aligned properly. To do this, we round the | |||
1784 | // amount of space needed for the outgoing arguments up to the next | |||
1785 | // alignment boundary. | |||
1786 | unsigned Align = getStackAlignment(); | |||
1787 | Amount = (Amount+Align-1)/Align*Align; | |||
1788 | ||||
1789 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |||
1790 | assert(!AFI->isThumb1OnlyFunction() &&((!AFI->isThumb1OnlyFunction() && "This eliminateCallFramePseudoInstr does not support Thumb1!" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This eliminateCallFramePseudoInstr does not support Thumb1!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 1791, __PRETTY_FUNCTION__)) | |||
1791 | "This eliminateCallFramePseudoInstr does not support Thumb1!")((!AFI->isThumb1OnlyFunction() && "This eliminateCallFramePseudoInstr does not support Thumb1!" ) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This eliminateCallFramePseudoInstr does not support Thumb1!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 1791, __PRETTY_FUNCTION__)); | |||
1792 | bool isARM = !AFI->isThumbFunction(); | |||
1793 | ||||
1794 | // Replace the pseudo instruction with a new instruction... | |||
1795 | unsigned Opc = Old->getOpcode(); | |||
1796 | int PIdx = Old->findFirstPredOperandIdx(); | |||
1797 | ARMCC::CondCodes Pred = (PIdx == -1) | |||
1798 | ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); | |||
1799 | if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { | |||
1800 | // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. | |||
1801 | unsigned PredReg = Old->getOperand(2).getReg(); | |||
1802 | emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, | |||
1803 | Pred, PredReg); | |||
1804 | } else { | |||
1805 | // Note: PredReg is operand 3 for ADJCALLSTACKUP. | |||
1806 | unsigned PredReg = Old->getOperand(3).getReg(); | |||
1807 | assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP)((Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP) ? static_cast<void> (0) : __assert_fail ("Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 1807, __PRETTY_FUNCTION__)); | |||
1808 | emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, | |||
1809 | Pred, PredReg); | |||
1810 | } | |||
1811 | } | |||
1812 | } | |||
1813 | MBB.erase(I); | |||
1814 | } | |||
1815 | ||||
1816 | /// Get the minimum constant for ARM that is greater than or equal to the | |||
1817 | /// argument. In ARM, constants can have any value that can be produced by | |||
1818 | /// rotating an 8-bit value to the right by an even number of bits within a | |||
1819 | /// 32-bit word. | |||
1820 | static uint32_t alignToARMConstant(uint32_t Value) { | |||
1821 | unsigned Shifted = 0; | |||
1822 | ||||
1823 | if (Value == 0) | |||
1824 | return 0; | |||
1825 | ||||
1826 | while (!(Value & 0xC0000000)) { | |||
1827 | Value = Value << 2; | |||
1828 | Shifted += 2; | |||
1829 | } | |||
1830 | ||||
1831 | bool Carry = (Value & 0x00FFFFFF); | |||
1832 | Value = ((Value & 0xFF000000) >> 24) + Carry; | |||
1833 | ||||
1834 | if (Value & 0x0000100) | |||
1835 | Value = Value & 0x000001FC; | |||
1836 | ||||
1837 | if (Shifted > 24) | |||
1838 | Value = Value >> (Shifted - 24); | |||
1839 | else | |||
1840 | Value = Value << (24 - Shifted); | |||
1841 | ||||
1842 | return Value; | |||
1843 | } | |||
1844 | ||||
1845 | // The stack limit in the TCB is set to this many bytes above the actual | |||
1846 | // stack limit. | |||
1847 | static const uint64_t kSplitStackAvailable = 256; | |||
1848 | ||||
1849 | // Adjust the function prologue to enable split stacks. This currently only | |||
1850 | // supports android and linux. | |||
1851 | // | |||
1852 | // The ABI of the segmented stack prologue is a little arbitrarily chosen, but | |||
1853 | // must be well defined in order to allow for consistent implementations of the | |||
1854 | // __morestack helper function. The ABI is also not a normal ABI in that it | |||
1855 | // doesn't follow the normal calling conventions because this allows the | |||
1856 | // prologue of each function to be optimized further. | |||
1857 | // | |||
1858 | // Currently, the ABI looks like (when calling __morestack) | |||
1859 | // | |||
1860 | // * r4 holds the minimum stack size requested for this function call | |||
1861 | // * r5 holds the stack size of the arguments to the function | |||
1862 | // * the beginning of the function is 3 instructions after the call to | |||
1863 | // __morestack | |||
1864 | // | |||
1865 | // Implementations of __morestack should use r4 to allocate a new stack, r5 to | |||
1866 | // place the arguments on to the new stack, and the 3-instruction knowledge to | |||
1867 | // jump directly to the body of the function when working on the new stack. | |||
1868 | // | |||
1869 | // An old (and possibly no longer compatible) implementation of __morestack for | |||
1870 | // ARM can be found at [1]. | |||
1871 | // | |||
1872 | // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S | |||
1873 | void ARMFrameLowering::adjustForSegmentedStacks( | |||
1874 | MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { | |||
1875 | unsigned Opcode; | |||
1876 | unsigned CFIIndex; | |||
1877 | const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>(); | |||
1878 | bool Thumb = ST->isThumb(); | |||
1879 | ||||
1880 | // Sadly, this currently doesn't support varargs, platforms other than | |||
1881 | // android/linux. Note that thumb1/thumb2 are support for android/linux. | |||
1882 | if (MF.getFunction()->isVarArg()) | |||
1883 | report_fatal_error("Segmented stacks do not support vararg functions."); | |||
1884 | if (!ST->isTargetAndroid() && !ST->isTargetLinux()) | |||
1885 | report_fatal_error("Segmented stacks not supported on this platform."); | |||
1886 | ||||
1887 | assert(&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented")((&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented" ) ? static_cast<void> (0) : __assert_fail ("&PrologueMBB == &MF.front() && \"Shrink-wrapping not yet implemented\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 1887, __PRETTY_FUNCTION__)); | |||
1888 | MachineFrameInfo *MFI = MF.getFrameInfo(); | |||
1889 | MachineModuleInfo &MMI = MF.getMMI(); | |||
1890 | MCContext &Context = MMI.getContext(); | |||
1891 | const MCRegisterInfo *MRI = Context.getRegisterInfo(); | |||
1892 | const ARMBaseInstrInfo &TII = | |||
1893 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); | |||
1894 | ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>(); | |||
1895 | DebugLoc DL; | |||
1896 | ||||
1897 | uint64_t StackSize = MFI->getStackSize(); | |||
1898 | ||||
1899 | // Do not generate a prologue for functions with a stack of size zero | |||
1900 | if (StackSize == 0) | |||
1901 | return; | |||
1902 | ||||
1903 | // Use R4 and R5 as scratch registers. | |||
1904 | // We save R4 and R5 before use and restore them before leaving the function. | |||
1905 | unsigned ScratchReg0 = ARM::R4; | |||
1906 | unsigned ScratchReg1 = ARM::R5; | |||
1907 | uint64_t AlignedStackSize; | |||
1908 | ||||
1909 | MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock(); | |||
1910 | MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock(); | |||
1911 | MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock(); | |||
1912 | MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock(); | |||
1913 | MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock(); | |||
1914 | ||||
1915 | for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(), | |||
1916 | e = PrologueMBB.livein_end(); | |||
1917 | i != e; ++i) { | |||
1918 | AllocMBB->addLiveIn(*i); | |||
1919 | GetMBB->addLiveIn(*i); | |||
1920 | McrMBB->addLiveIn(*i); | |||
1921 | PrevStackMBB->addLiveIn(*i); | |||
1922 | PostStackMBB->addLiveIn(*i); | |||
1923 | } | |||
1924 | ||||
1925 | MF.push_front(PostStackMBB); | |||
1926 | MF.push_front(AllocMBB); | |||
1927 | MF.push_front(GetMBB); | |||
1928 | MF.push_front(McrMBB); | |||
1929 | MF.push_front(PrevStackMBB); | |||
1930 | ||||
1931 | // The required stack size that is aligned to ARM constant criterion. | |||
1932 | AlignedStackSize = alignToARMConstant(StackSize); | |||
1933 | ||||
1934 | // When the frame size is less than 256 we just compare the stack | |||
1935 | // boundary directly to the value of the stack pointer, per gcc. | |||
1936 | bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable; | |||
1937 | ||||
1938 | // We will use two of the callee save registers as scratch registers so we | |||
1939 | // need to save those registers onto the stack. | |||
1940 | // We will use SR0 to hold stack limit and SR1 to hold the stack size | |||
1941 | // requested and arguments for __morestack(). | |||
1942 | // SR0: Scratch Register #0 | |||
1943 | // SR1: Scratch Register #1 | |||
1944 | // push {SR0, SR1} | |||
1945 | if (Thumb) { | |||
1946 | AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))) | |||
1947 | .addReg(ScratchReg0).addReg(ScratchReg1); | |||
1948 | } else { | |||
1949 | AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD)) | |||
1950 | .addReg(ARM::SP, RegState::Define).addReg(ARM::SP)) | |||
1951 | .addReg(ScratchReg0).addReg(ScratchReg1); | |||
1952 | } | |||
1953 | ||||
1954 | // Emit the relevant DWARF information about the change in stack pointer as | |||
1955 | // well as where to find both r4 and r5 (the callee-save registers) | |||
1956 | CFIIndex = | |||
1957 | MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8)); | |||
1958 | BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
1959 | .addCFIIndex(CFIIndex); | |||
1960 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( | |||
1961 | nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4)); | |||
1962 | BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
1963 | .addCFIIndex(CFIIndex); | |||
1964 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( | |||
1965 | nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8)); | |||
1966 | BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
1967 | .addCFIIndex(CFIIndex); | |||
1968 | ||||
1969 | // mov SR1, sp | |||
1970 | if (Thumb) { | |||
1971 | AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1) | |||
1972 | .addReg(ARM::SP)); | |||
1973 | } else if (CompareStackPointer) { | |||
1974 | AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1) | |||
1975 | .addReg(ARM::SP)).addReg(0); | |||
1976 | } | |||
1977 | ||||
1978 | // sub SR1, sp, #StackSize | |||
1979 | if (!CompareStackPointer && Thumb) { | |||
1980 | AddDefaultPred( | |||
1981 | AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)) | |||
1982 | .addReg(ScratchReg1).addImm(AlignedStackSize)); | |||
1983 | } else if (!CompareStackPointer) { | |||
1984 | AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) | |||
1985 | .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0); | |||
1986 | } | |||
1987 | ||||
1988 | if (Thumb && ST->isThumb1Only()) { | |||
1989 | unsigned PCLabelId = ARMFI->createPICLabelUId(); | |||
1990 | ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create( | |||
1991 | MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0); | |||
1992 | MachineConstantPool *MCP = MF.getConstantPool(); | |||
1993 | unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment()); | |||
1994 | ||||
1995 | // ldr SR0, [pc, offset(STACK_LIMIT)] | |||
1996 | AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) | |||
1997 | .addConstantPoolIndex(CPI)); | |||
1998 | ||||
1999 | // ldr SR0, [SR0] | |||
2000 | AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0) | |||
2001 | .addReg(ScratchReg0).addImm(0)); | |||
2002 | } else { | |||
2003 | // Get TLS base address from the coprocessor | |||
2004 | // mrc p15, #0, SR0, c13, c0, #3 | |||
2005 | AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0) | |||
2006 | .addImm(15) | |||
2007 | .addImm(0) | |||
2008 | .addImm(13) | |||
2009 | .addImm(0) | |||
2010 | .addImm(3)); | |||
2011 | ||||
2012 | // Use the last tls slot on android and a private field of the TCP on linux. | |||
2013 | assert(ST->isTargetAndroid() || ST->isTargetLinux())((ST->isTargetAndroid() || ST->isTargetLinux()) ? static_cast <void> (0) : __assert_fail ("ST->isTargetAndroid() || ST->isTargetLinux()" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp" , 2013, __PRETTY_FUNCTION__)); | |||
2014 | unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1; | |||
2015 | ||||
2016 | // Get the stack limit from the right offset | |||
2017 | // ldr SR0, [sr0, #4 * TlsOffset] | |||
2018 | AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0) | |||
2019 | .addReg(ScratchReg0).addImm(4 * TlsOffset)); | |||
2020 | } | |||
2021 | ||||
2022 | // Compare stack limit with stack size requested. | |||
2023 | // cmp SR0, SR1 | |||
2024 | Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr; | |||
2025 | AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode)) | |||
2026 | .addReg(ScratchReg0) | |||
2027 | .addReg(ScratchReg1)); | |||
2028 | ||||
2029 | // This jump is taken if StackLimit < SP - stack required. | |||
2030 | Opcode = Thumb ? ARM::tBcc : ARM::Bcc; | |||
2031 | BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB) | |||
2032 | .addImm(ARMCC::LO) | |||
2033 | .addReg(ARM::CPSR); | |||
2034 | ||||
2035 | ||||
2036 | // Calling __morestack(StackSize, Size of stack arguments). | |||
2037 | // __morestack knows that the stack size requested is in SR0(r4) | |||
2038 | // and amount size of stack arguments is in SR1(r5). | |||
2039 | ||||
2040 | // Pass first argument for the __morestack by Scratch Register #0. | |||
2041 | // The amount size of stack required | |||
2042 | if (Thumb) { | |||
2043 | AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), | |||
2044 | ScratchReg0)).addImm(AlignedStackSize)); | |||
2045 | } else { | |||
2046 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) | |||
2047 | .addImm(AlignedStackSize)).addReg(0); | |||
2048 | } | |||
2049 | // Pass second argument for the __morestack by Scratch Register #1. | |||
2050 | // The amount size of stack consumed to save function arguments. | |||
2051 | if (Thumb) { | |||
2052 | AddDefaultPred( | |||
2053 | AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)) | |||
2054 | .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))); | |||
2055 | } else { | |||
2056 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) | |||
2057 | .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))) | |||
2058 | .addReg(0); | |||
2059 | } | |||
2060 | ||||
2061 | // push {lr} - Save return address of this function. | |||
2062 | if (Thumb) { | |||
2063 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))) | |||
2064 | .addReg(ARM::LR); | |||
2065 | } else { | |||
2066 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD)) | |||
2067 | .addReg(ARM::SP, RegState::Define) | |||
2068 | .addReg(ARM::SP)) | |||
2069 | .addReg(ARM::LR); | |||
2070 | } | |||
2071 | ||||
2072 | // Emit the DWARF info about the change in stack as well as where to find the | |||
2073 | // previous link register | |||
2074 | CFIIndex = | |||
2075 | MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12)); | |||
2076 | BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
2077 | .addCFIIndex(CFIIndex); | |||
2078 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( | |||
2079 | nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12)); | |||
2080 | BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
2081 | .addCFIIndex(CFIIndex); | |||
2082 | ||||
2083 | // Call __morestack(). | |||
2084 | if (Thumb) { | |||
2085 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL))) | |||
2086 | .addExternalSymbol("__morestack"); | |||
2087 | } else { | |||
2088 | BuildMI(AllocMBB, DL, TII.get(ARM::BL)) | |||
2089 | .addExternalSymbol("__morestack"); | |||
2090 | } | |||
2091 | ||||
2092 | // pop {lr} - Restore return address of this original function. | |||
2093 | if (Thumb) { | |||
2094 | if (ST->isThumb1Only()) { | |||
2095 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) | |||
2096 | .addReg(ScratchReg0); | |||
2097 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR) | |||
2098 | .addReg(ScratchReg0)); | |||
2099 | } else { | |||
2100 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST)) | |||
2101 | .addReg(ARM::LR, RegState::Define) | |||
2102 | .addReg(ARM::SP, RegState::Define) | |||
2103 | .addReg(ARM::SP) | |||
2104 | .addImm(4)); | |||
2105 | } | |||
2106 | } else { | |||
2107 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) | |||
2108 | .addReg(ARM::SP, RegState::Define) | |||
2109 | .addReg(ARM::SP)) | |||
2110 | .addReg(ARM::LR); | |||
2111 | } | |||
2112 | ||||
2113 | // Restore SR0 and SR1 in case of __morestack() was called. | |||
2114 | // __morestack() will skip PostStackMBB block so we need to restore | |||
2115 | // scratch registers from here. | |||
2116 | // pop {SR0, SR1} | |||
2117 | if (Thumb) { | |||
2118 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) | |||
2119 | .addReg(ScratchReg0) | |||
2120 | .addReg(ScratchReg1); | |||
2121 | } else { | |||
2122 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) | |||
2123 | .addReg(ARM::SP, RegState::Define) | |||
2124 | .addReg(ARM::SP)) | |||
2125 | .addReg(ScratchReg0) | |||
2126 | .addReg(ScratchReg1); | |||
2127 | } | |||
2128 | ||||
2129 | // Update the CFA offset now that we've popped | |||
2130 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); | |||
2131 | BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
2132 | .addCFIIndex(CFIIndex); | |||
2133 | ||||
2134 | // bx lr - Return from this function. | |||
2135 | Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET; | |||
2136 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode))); | |||
2137 | ||||
2138 | // Restore SR0 and SR1 in case of __morestack() was not called. | |||
2139 | // pop {SR0, SR1} | |||
2140 | if (Thumb) { | |||
2141 | AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))) | |||
2142 | .addReg(ScratchReg0) | |||
2143 | .addReg(ScratchReg1); | |||
2144 | } else { | |||
2145 | AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD)) | |||
2146 | .addReg(ARM::SP, RegState::Define) | |||
2147 | .addReg(ARM::SP)) | |||
2148 | .addReg(ScratchReg0) | |||
2149 | .addReg(ScratchReg1); | |||
2150 | } | |||
2151 | ||||
2152 | // Update the CFA offset now that we've popped | |||
2153 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); | |||
2154 | BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
2155 | .addCFIIndex(CFIIndex); | |||
2156 | ||||
2157 | // Tell debuggers that r4 and r5 are now the same as they were in the | |||
2158 | // previous function, that they're the "Same Value". | |||
2159 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( | |||
2160 | nullptr, MRI->getDwarfRegNum(ScratchReg0, true))); | |||
2161 | BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
2162 | .addCFIIndex(CFIIndex); | |||
2163 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( | |||
2164 | nullptr, MRI->getDwarfRegNum(ScratchReg1, true))); | |||
2165 | BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
2166 | .addCFIIndex(CFIIndex); | |||
2167 | ||||
2168 | // Organizing MBB lists | |||
2169 | PostStackMBB->addSuccessor(&PrologueMBB); | |||
2170 | ||||
2171 | AllocMBB->addSuccessor(PostStackMBB); | |||
2172 | ||||
2173 | GetMBB->addSuccessor(PostStackMBB); | |||
2174 | GetMBB->addSuccessor(AllocMBB); | |||
2175 | ||||
2176 | McrMBB->addSuccessor(GetMBB); | |||
2177 | ||||
2178 | PrevStackMBB->addSuccessor(McrMBB); | |||
2179 | ||||
2180 | #ifdef XDEBUG | |||
2181 | MF.verify(); | |||
2182 | #endif | |||
2183 | } |