LLVM 17.0.0git
ARMFrameLowering.cpp
Go to the documentation of this file.
1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
115#include "ARMSubtarget.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCContext.h"
144#include "llvm/MC/MCDwarf.h"
145#include "llvm/MC/MCInstrDesc.h"
147#include "llvm/Support/CodeGen.h"
150#include "llvm/Support/Debug.h"
156#include <algorithm>
157#include <cassert>
158#include <cstddef>
159#include <cstdint>
160#include <iterator>
161#include <utility>
162#include <vector>
163
164#define DEBUG_TYPE "arm-frame-lowering"
165
166using namespace llvm;
167
168static cl::opt<bool>
169SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
170 cl::desc("Align ARM NEON spills in prolog and epilog"));
171
174 unsigned NumAlignedDPRCS2Regs);
175
177 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
178 STI(sti) {}
179
181 // iOS always has a FP for backtracking, force other targets to keep their FP
182 // when doing FastISel. The emitted code is currently superior, and in cases
183 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
184 return MF.getSubtarget<ARMSubtarget>().useFastISel();
185}
186
187/// Returns true if the target can safely skip saving callee-saved registers
188/// for noreturn nounwind functions.
190 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
191 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
192 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
193
194 // Frame pointer and link register are not treated as normal CSR, thus we
195 // can always skip CSR saves for nonreturning functions.
196 return true;
197}
198
199/// hasFP - Return true if the specified function should have a dedicated frame
200/// pointer register. This is true if the function has variable sized allocas
201/// or if frame pointer elimination is disabled.
203 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
204 const MachineFrameInfo &MFI = MF.getFrameInfo();
205
206 // ABI-required frame pointer.
208 return true;
209
210 // Frame pointer required for use within this function.
211 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
212 MFI.isFrameAddressTaken());
213}
214
215/// isFPReserved - Return true if the frame pointer register should be
216/// considered a reserved register on the scope of the specified function.
218 return hasFP(MF) || MF.getSubtarget<ARMSubtarget>().createAAPCSFrameChain();
219}
220
221/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
222/// not required, we reserve argument space for call sites in the function
223/// immediately on entry to the current function. This eliminates the need for
224/// add/sub sp brackets around call sites. Returns true if the call frame is
225/// included as part of the stack frame.
227 const MachineFrameInfo &MFI = MF.getFrameInfo();
228 unsigned CFSize = MFI.getMaxCallFrameSize();
229 // It's not always a good idea to include the call frame as part of the
230 // stack frame. ARM (especially Thumb) has small immediate offset to
231 // address the stack frame. So a large call frame can cause poor codegen
232 // and may even makes it impossible to scavenge a register.
233 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
234 return false;
235
236 return !MFI.hasVarSizedObjects();
237}
238
239/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
240/// call frame pseudos can be simplified. Unlike most targets, having a FP
241/// is not sufficient here since we still may reference some objects via SP
242/// even when FP is available in Thumb2 mode.
243bool
246}
247
248// Returns how much of the incoming argument stack area we should clean up in an
249// epilogue. For the C calling convention this will be 0, for guaranteed tail
250// call conventions it can be positive (a normal return or a tail call to a
251// function that uses less stack space for arguments) or negative (for a tail
252// call to a function that needs more stack space than us for arguments).
256 bool IsTailCallReturn = false;
257 if (MBB.end() != MBBI) {
258 unsigned RetOpcode = MBBI->getOpcode();
259 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
260 RetOpcode == ARM::TCRETURNri;
261 }
263
264 int ArgumentPopSize = 0;
265 if (IsTailCallReturn) {
266 MachineOperand &StackAdjust = MBBI->getOperand(1);
267
268 // For a tail-call in a callee-pops-arguments environment, some or all of
269 // the stack may actually be in use for the call's arguments, this is
270 // calculated during LowerCall and consumed here...
271 ArgumentPopSize = StackAdjust.getImm();
272 } else {
273 // ... otherwise the amount to pop is *all* of the argument space,
274 // conveniently stored in the MachineFunctionInfo by
275 // LowerFormalArguments. This will, of course, be zero for the C calling
276 // convention.
277 ArgumentPopSize = AFI->getArgumentStackToRestore();
278 }
279
280 return ArgumentPopSize;
281}
282
283static bool needsWinCFI(const MachineFunction &MF) {
284 const Function &F = MF.getFunction();
285 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
286 F.needsUnwindTableEntry();
287}
288
289// Given a load or a store instruction, generate an appropriate unwinding SEH
290// code on Windows.
292 const TargetInstrInfo &TII,
293 unsigned Flags) {
294 unsigned Opc = MBBI->getOpcode();
296 MachineFunction &MF = *MBB->getParent();
297 DebugLoc DL = MBBI->getDebugLoc();
299 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
300 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
301
303
304 switch (Opc) {
305 default:
306 report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
307 break;
308 case ARM::t2ADDri: // add.w r11, sp, #xx
309 case ARM::t2ADDri12: // add.w r11, sp, #xx
310 case ARM::t2MOVTi16: // movt r4, #xx
311 case ARM::tBL: // bl __chkstk
312 // These are harmless if used for just setting up a frame pointer,
313 // but that frame pointer can't be relied upon for unwinding, unless
314 // set up with SEH_SaveSP.
315 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
316 .addImm(/*Wide=*/1)
318 break;
319
320 case ARM::t2MOVi16: { // mov(w) r4, #xx
321 bool Wide = MBBI->getOperand(1).getImm() >= 256;
322 if (!Wide) {
323 MachineInstrBuilder NewInstr =
324 BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
325 NewInstr.add(MBBI->getOperand(0));
326 NewInstr.add(t1CondCodeOp(/*isDead=*/true));
327 for (unsigned i = 1, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
328 NewInstr.add(MBBI->getOperand(i));
329 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
330 MBB->erase(MBBI);
331 MBBI = NewMBBI;
332 }
333 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
334 break;
335 }
336
337 case ARM::tBLXr: // blx r12 (__chkstk)
338 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
339 .addImm(/*Wide=*/0)
341 break;
342
343 case ARM::t2MOVi32imm: // movw+movt
344 // This pseudo instruction expands into two mov instructions. If the
345 // second operand is a symbol reference, this will stay as two wide
346 // instructions, movw+movt. If they're immediates, the first one can
347 // end up as a narrow mov though.
348 // As two SEH instructions are appended here, they won't get interleaved
349 // between the two final movw/movt instructions, but it doesn't make any
350 // practical difference.
351 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
352 .addImm(/*Wide=*/1)
354 MBB->insertAfter(MBBI, MIB);
355 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
356 .addImm(/*Wide=*/1)
358 break;
359
360 case ARM::t2LDMIA_RET:
361 case ARM::t2LDMIA_UPD:
362 case ARM::t2STMDB_UPD: {
363 unsigned Mask = 0;
364 bool Wide = false;
365 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
366 const MachineOperand &MO = MBBI->getOperand(i);
367 if (!MO.isReg() || MO.isImplicit())
368 continue;
369 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
370 if (Reg == 15)
371 Reg = 14;
372 if (Reg >= 8 && Reg <= 13)
373 Wide = true;
374 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
375 Wide = true;
376 Mask |= 1 << Reg;
377 }
378 if (!Wide) {
379 unsigned NewOpc;
380 switch (Opc) {
381 case ARM::t2LDMIA_RET:
382 NewOpc = ARM::tPOP_RET;
383 break;
384 case ARM::t2LDMIA_UPD:
385 NewOpc = ARM::tPOP;
386 break;
387 case ARM::t2STMDB_UPD:
388 NewOpc = ARM::tPUSH;
389 break;
390 default:
392 }
393 MachineInstrBuilder NewInstr =
394 BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
395 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
396 NewInstr.add(MBBI->getOperand(i));
397 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
398 MBB->erase(MBBI);
399 MBBI = NewMBBI;
400 }
401 unsigned SEHOpc =
402 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
403 MIB = BuildMI(MF, DL, TII.get(SEHOpc))
404 .addImm(Mask)
405 .addImm(Wide ? 1 : 0)
407 break;
408 }
409 case ARM::VSTMDDB_UPD:
410 case ARM::VLDMDIA_UPD: {
411 int First = -1, Last = 0;
412 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
413 const MachineOperand &MO = MBBI->getOperand(i);
414 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
415 if (First == -1)
416 First = Reg;
417 Last = Reg;
418 }
419 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
420 .addImm(First)
421 .addImm(Last)
423 break;
424 }
425 case ARM::tSUBspi:
426 case ARM::tADDspi:
427 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
428 .addImm(MBBI->getOperand(2).getImm() * 4)
429 .addImm(/*Wide=*/0)
431 break;
432 case ARM::t2SUBspImm:
433 case ARM::t2SUBspImm12:
434 case ARM::t2ADDspImm:
435 case ARM::t2ADDspImm12:
436 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
437 .addImm(MBBI->getOperand(2).getImm())
438 .addImm(/*Wide=*/1)
440 break;
441
442 case ARM::tMOVr:
443 if (MBBI->getOperand(1).getReg() == ARM::SP &&
445 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
446 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
447 .addImm(Reg)
449 } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
451 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
452 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
453 .addImm(Reg)
455 } else {
456 report_fatal_error("No SEH Opcode for MOV");
457 }
458 break;
459
460 case ARM::tBX_RET:
461 case ARM::TCRETURNri:
462 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
463 .addImm(/*Wide=*/0)
465 break;
466
467 case ARM::TCRETURNdi:
468 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
469 .addImm(/*Wide=*/1)
471 break;
472 }
473 return MBB->insertAfter(MBBI, MIB);
474}
475
478 if (MBBI == MBB.begin())
480 return std::prev(MBBI);
481}
482
486 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
487 if (Start.isValid())
488 Start = std::next(Start);
489 else
490 Start = MBB.begin();
491
492 for (auto MI = Start; MI != End;) {
493 auto Next = std::next(MI);
494 // Check if this instruction already has got a SEH opcode added. In that
495 // case, don't do this generic mapping.
496 if (Next != End && isSEHInstruction(*Next)) {
497 MI = std::next(Next);
498 while (MI != End && isSEHInstruction(*MI))
499 ++MI;
500 continue;
501 }
502 insertSEH(MI, TII, MIFlags);
503 MI = Next;
504 }
505}
506
509 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
510 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
511 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
512 if (isARM)
513 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
514 Pred, PredReg, TII, MIFlags);
515 else
516 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
517 Pred, PredReg, TII, MIFlags);
518}
519
520static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
522 const ARMBaseInstrInfo &TII, int NumBytes,
523 unsigned MIFlags = MachineInstr::NoFlags,
525 unsigned PredReg = 0) {
526 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
527 MIFlags, Pred, PredReg);
528}
529
531 int RegSize;
532 switch (MI.getOpcode()) {
533 case ARM::VSTMDDB_UPD:
534 RegSize = 8;
535 break;
536 case ARM::STMDB_UPD:
537 case ARM::t2STMDB_UPD:
538 RegSize = 4;
539 break;
540 case ARM::t2STR_PRE:
541 case ARM::STR_PRE_IMM:
542 return 4;
543 default:
544 llvm_unreachable("Unknown push or pop like instruction");
545 }
546
547 int count = 0;
548 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
549 // pred) so the list starts at 4.
550 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
551 count += RegSize;
552 return count;
553}
554
556 size_t StackSizeInBytes) {
557 const MachineFrameInfo &MFI = MF.getFrameInfo();
558 const Function &F = MF.getFunction();
559 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
560
561 StackProbeSize =
562 F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);
563 return (StackSizeInBytes >= StackProbeSize) &&
564 !F.hasFnAttribute("no-stack-arg-probe");
565}
566
567namespace {
568
569struct StackAdjustingInsts {
570 struct InstInfo {
572 unsigned SPAdjust;
573 bool BeforeFPSet;
574 };
575
577
578 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
579 bool BeforeFPSet = false) {
580 InstInfo Info = {I, SPAdjust, BeforeFPSet};
581 Insts.push_back(Info);
582 }
583
584 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
585 auto Info =
586 llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
587 assert(Info != Insts.end() && "invalid sp adjusting instruction");
588 Info->SPAdjust += ExtraBytes;
589 }
590
591 void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
592 const ARMBaseInstrInfo &TII, bool HasFP) {
594 unsigned CFAOffset = 0;
595 for (auto &Info : Insts) {
596 if (HasFP && !Info.BeforeFPSet)
597 return;
598
599 CFAOffset += Info.SPAdjust;
600 unsigned CFIIndex = MF.addFrameInst(
601 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
602 BuildMI(MBB, std::next(Info.I), dl,
603 TII.get(TargetOpcode::CFI_INSTRUCTION))
604 .addCFIIndex(CFIIndex)
606 }
607 }
608};
609
610} // end anonymous namespace
611
612/// Emit an instruction sequence that will align the address in
613/// register Reg by zero-ing out the lower bits. For versions of the
614/// architecture that support Neon, this must be done in a single
615/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
616/// single instruction. That function only gets called when optimizing
617/// spilling of D registers on a core with the Neon instruction set
618/// present.
620 const TargetInstrInfo &TII,
623 const DebugLoc &DL, const unsigned Reg,
624 const Align Alignment,
625 const bool MustBeSingleInstruction) {
626 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
627 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
628 const unsigned AlignMask = Alignment.value() - 1U;
629 const unsigned NrBitsToZero = Log2(Alignment);
630 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
631 if (!AFI->isThumbFunction()) {
632 // if the BFC instruction is available, use that to zero the lower
633 // bits:
634 // bfc Reg, #0, log2(Alignment)
635 // otherwise use BIC, if the mask to zero the required number of bits
636 // can be encoded in the bic immediate field
637 // bic Reg, Reg, Alignment-1
638 // otherwise, emit
639 // lsr Reg, Reg, log2(Alignment)
640 // lsl Reg, Reg, log2(Alignment)
641 if (CanUseBFC) {
642 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
644 .addImm(~AlignMask)
646 } else if (AlignMask <= 255) {
647 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
649 .addImm(AlignMask)
651 .add(condCodeOp());
652 } else {
653 assert(!MustBeSingleInstruction &&
654 "Shouldn't call emitAligningInstructions demanding a single "
655 "instruction to be emitted for large stack alignment for a target "
656 "without BFC.");
657 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
659 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
661 .add(condCodeOp());
662 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
664 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
666 .add(condCodeOp());
667 }
668 } else {
669 // Since this is only reached for Thumb-2 targets, the BFC instruction
670 // should always be available.
671 assert(CanUseBFC);
672 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
674 .addImm(~AlignMask)
676 }
677}
678
679/// We need the offset of the frame pointer relative to other MachineFrameInfo
680/// offsets which are encoded relative to SP at function begin.
681/// See also emitPrologue() for how the FP is set up.
682/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
683/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
684/// this to produce a conservative estimate that we check in an assert() later.
685static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
686 const MachineFunction &MF) {
687 // For Thumb1, push.w isn't available, so the first push will always push
688 // r7 and lr onto the stack first.
689 if (AFI.isThumb1OnlyFunction())
690 return -AFI.getArgRegsSaveSize() - (2 * 4);
691 // This is a conservative estimation: Assume the frame pointer being r7 and
692 // pc("r15") up to r8 getting spilled before (= 8 registers).
693 int MaxRegBytes = 8 * 4;
694 if (STI.splitFramePointerPush(MF)) {
695 // Here, r11 can be stored below all of r4-r15 (3 registers more than
696 // above), plus d8-d15.
697 MaxRegBytes = 11 * 4 + 8 * 8;
698 }
699 int FPCXTSaveSize =
700 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
701 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
702}
703
705 MachineBasicBlock &MBB) const {
707 MachineFrameInfo &MFI = MF.getFrameInfo();
709 MachineModuleInfo &MMI = MF.getMMI();
710 MCContext &Context = MMI.getContext();
711 const TargetMachine &TM = MF.getTarget();
712 const MCRegisterInfo *MRI = Context.getRegisterInfo();
713 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
716 "This emitPrologue does not support Thumb1!");
717 bool isARM = !AFI->isThumbFunction();
718 Align Alignment = STI.getFrameLowering()->getStackAlign();
719 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
720 unsigned NumBytes = MFI.getStackSize();
721 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
722 int FPCXTSaveSize = 0;
723 bool NeedsWinCFI = needsWinCFI(MF);
724
725 // Debug location must be unknown since the first debug location is used
726 // to determine the end of the prologue.
727 DebugLoc dl;
728
729 Register FramePtr = RegInfo->getFrameRegister(MF);
730
731 // Determine the sizes of each callee-save spill areas and record which frame
732 // belongs to which callee-save spill areas.
733 unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
734 int FramePtrSpillFI = 0;
735 int D8SpillFI = 0;
736
737 // All calls are tail calls in GHC calling conv, and functions have no
738 // prologue/epilogue.
740 return;
741
742 StackAdjustingInsts DefCFAOffsetCandidates;
743 bool HasFP = hasFP(MF);
744
745 if (!AFI->hasStackFrame() &&
746 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
747 if (NumBytes != 0) {
748 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
750 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
751 }
752 if (!NeedsWinCFI)
753 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
754 if (NeedsWinCFI && MBBI != MBB.begin()) {
756 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
758 MF.setHasWinCFI(true);
759 }
760 return;
761 }
762
763 // Determine spill area sizes.
764 if (STI.splitFramePointerPush(MF)) {
765 for (const CalleeSavedInfo &I : CSI) {
766 Register Reg = I.getReg();
767 int FI = I.getFrameIdx();
768 switch (Reg) {
769 case ARM::R11:
770 case ARM::LR:
771 if (Reg == FramePtr)
772 FramePtrSpillFI = FI;
773 GPRCS2Size += 4;
774 break;
775 case ARM::R0:
776 case ARM::R1:
777 case ARM::R2:
778 case ARM::R3:
779 case ARM::R4:
780 case ARM::R5:
781 case ARM::R6:
782 case ARM::R7:
783 case ARM::R8:
784 case ARM::R9:
785 case ARM::R10:
786 case ARM::R12:
787 GPRCS1Size += 4;
788 break;
789 case ARM::FPCXTNS:
790 FPCXTSaveSize = 4;
791 break;
792 default:
793 // This is a DPR. Exclude the aligned DPRCS2 spills.
794 if (Reg == ARM::D8)
795 D8SpillFI = FI;
796 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
797 DPRCSSize += 8;
798 }
799 }
800 } else {
801 for (const CalleeSavedInfo &I : CSI) {
802 Register Reg = I.getReg();
803 int FI = I.getFrameIdx();
804 switch (Reg) {
805 case ARM::R8:
806 case ARM::R9:
807 case ARM::R10:
808 case ARM::R11:
809 case ARM::R12:
810 if (STI.splitFramePushPop(MF)) {
811 GPRCS2Size += 4;
812 break;
813 }
814 [[fallthrough]];
815 case ARM::R0:
816 case ARM::R1:
817 case ARM::R2:
818 case ARM::R3:
819 case ARM::R4:
820 case ARM::R5:
821 case ARM::R6:
822 case ARM::R7:
823 case ARM::LR:
824 if (Reg == FramePtr)
825 FramePtrSpillFI = FI;
826 GPRCS1Size += 4;
827 break;
828 case ARM::FPCXTNS:
829 FPCXTSaveSize = 4;
830 break;
831 default:
832 // This is a DPR. Exclude the aligned DPRCS2 spills.
833 if (Reg == ARM::D8)
834 D8SpillFI = FI;
835 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
836 DPRCSSize += 8;
837 }
838 }
839 }
840
841 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
842
843 // Move past the PAC computation.
844 if (AFI->shouldSignReturnAddress())
845 LastPush = MBBI++;
846
847 // Move past FPCXT area.
848 if (FPCXTSaveSize > 0) {
849 LastPush = MBBI++;
850 DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
851 }
852
853 // Allocate the vararg register save area.
854 if (ArgRegsSaveSize) {
855 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
857 LastPush = std::prev(MBBI);
858 DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
859 }
860
861 // Move past area 1.
862 if (GPRCS1Size > 0) {
863 GPRCS1Push = LastPush = MBBI++;
864 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
865 }
866
867 // Determine starting offsets of spill areas.
868 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
869 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
870 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
871 Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
872 unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
873 if (!STI.splitFramePointerPush(MF)) {
874 DPRGapSize += GPRCS2Size;
875 }
876 DPRGapSize %= DPRAlign.value();
877
878 unsigned DPRCSOffset;
879 if (STI.splitFramePointerPush(MF)) {
880 DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
881 GPRCS2Offset = DPRCSOffset - GPRCS2Size;
882 } else {
883 DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
884 }
885 int FramePtrOffsetInPush = 0;
886 if (HasFP) {
887 int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
888 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
889 "Max FP estimation is wrong");
890 FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
891 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
892 NumBytes);
893 }
894 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
895 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
896 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
897
898 // Move past area 2.
899 if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
900 GPRCS2Push = LastPush = MBBI++;
901 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
902 }
903
904 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
905 // .cfi_offset operations will reflect that.
906 if (DPRGapSize) {
907 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
908 if (LastPush != MBB.end() &&
909 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
910 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
911 else {
912 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
914 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
915 }
916 }
917
918 // Move past area 3.
919 if (DPRCSSize > 0) {
920 // Since vpush register list cannot have gaps, there may be multiple vpush
921 // instructions in the prologue.
922 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
923 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
924 LastPush = MBBI++;
925 }
926 }
927
928 // Move past the aligned DPRCS2 area.
929 if (AFI->getNumAlignedDPRCS2Regs() > 0) {
931 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
932 // leaves the stack pointer pointing to the DPRCS2 area.
933 //
934 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
935 NumBytes += MFI.getObjectOffset(D8SpillFI);
936 } else
937 NumBytes = DPRCSOffset;
938
939 if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
940 GPRCS2Push = LastPush = MBBI++;
941 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
942 }
943
944 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
945 if (STI.splitFramePointerPush(MF) && HasFP)
946 NeedsWinCFIStackAlloc = false;
947
948 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
949 uint32_t NumWords = NumBytes >> 2;
950
951 if (NumWords < 65536) {
952 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
953 .addImm(NumWords)
956 } else {
957 // Split into two instructions here, instead of using t2MOVi32imm,
958 // to allow inserting accurate SEH instructions (including accurate
959 // instruction size for each of them).
960 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
961 .addImm(NumWords & 0xffff)
964 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
965 .addReg(ARM::R4)
966 .addImm(NumWords >> 16)
969 }
970
971 switch (TM.getCodeModel()) {
972 case CodeModel::Tiny:
973 llvm_unreachable("Tiny code model not available on ARM.");
974 case CodeModel::Small:
977 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
979 .addExternalSymbol("__chkstk")
980 .addReg(ARM::R4, RegState::Implicit)
982 break;
983 case CodeModel::Large:
984 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
985 .addExternalSymbol("__chkstk")
987
988 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
990 .addReg(ARM::R12, RegState::Kill)
991 .addReg(ARM::R4, RegState::Implicit)
993 break;
994 }
995
996 MachineInstrBuilder Instr, SEH;
997 Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
998 .addReg(ARM::SP, RegState::Kill)
999 .addReg(ARM::R4, RegState::Kill)
1002 .add(condCodeOp());
1003 if (NeedsWinCFIStackAlloc) {
1004 SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1005 .addImm(NumBytes)
1006 .addImm(/*Wide=*/1)
1008 MBB.insertAfter(Instr, SEH);
1009 }
1010 NumBytes = 0;
1011 }
1012
1013 if (NumBytes) {
1014 // Adjust SP after all the callee-save spills.
1015 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1016 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
1017 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
1018 else {
1019 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
1021 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
1022 }
1023
1024 if (HasFP && isARM)
1025 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1026 // Note it's not safe to do this in Thumb2 mode because it would have
1027 // taken two instructions:
1028 // mov sp, r7
1029 // sub sp, #24
1030 // If an interrupt is taken between the two instructions, then sp is in
1031 // an inconsistent state (pointing to the middle of callee-saved area).
1032 // The interrupt handler can end up clobbering the registers.
1033 AFI->setShouldRestoreSPFromFP(true);
1034 }
1035
1036 // Set FP to point to the stack slot that contains the previous FP.
1037 // For iOS, FP is R7, which has now been stored in spill area 1.
1038 // Otherwise, if this is not iOS, all the callee-saved registers go
1039 // into spill area 1, including the FP in R11. In either case, it
1040 // is in area one and the adjustment needs to take place just after
1041 // that push.
1042 // FIXME: The above is not necessary true when PACBTI is enabled.
1043 // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1044 // so FP ends up on area two.
1046 if (HasFP) {
1047 AfterPush = std::next(GPRCS1Push);
1048 unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
1049 int FPOffset = PushSize + FramePtrOffsetInPush;
1050 if (STI.splitFramePointerPush(MF)) {
1051 AfterPush = std::next(GPRCS2Push);
1052 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1053 FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
1054 } else {
1055 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1056 FramePtr, ARM::SP, FPOffset,
1058 }
1059 if (!NeedsWinCFI) {
1060 if (FramePtrOffsetInPush + PushSize != 0) {
1061 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
1062 nullptr, MRI->getDwarfRegNum(FramePtr, true),
1063 FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1064 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1065 .addCFIIndex(CFIIndex)
1067 } else {
1068 unsigned CFIIndex =
1070 nullptr, MRI->getDwarfRegNum(FramePtr, true)));
1071 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1072 .addCFIIndex(CFIIndex)
1074 }
1075 }
1076 }
1077
1078 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1079 // instructions below don't need to be replayed to unwind the stack.
1080 if (NeedsWinCFI && MBBI != MBB.begin()) {
1082 if (HasFP && STI.splitFramePointerPush(MF))
1083 End = AfterPush;
1085 BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1087 MF.setHasWinCFI(true);
1088 }
1089
1090 // Now that the prologue's actual instructions are finalised, we can insert
1091 // the necessary DWARF cf instructions to describe the situation. Start by
1092 // recording where each register ended up:
1093 if (GPRCS1Size > 0 && !NeedsWinCFI) {
1094 MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
1095 int CFIIndex;
1096 for (const auto &Entry : CSI) {
1097 Register Reg = Entry.getReg();
1098 int FI = Entry.getFrameIdx();
1099 switch (Reg) {
1100 case ARM::R8:
1101 case ARM::R9:
1102 case ARM::R10:
1103 case ARM::R11:
1104 case ARM::R12:
1105 if (STI.splitFramePushPop(MF))
1106 break;
1107 [[fallthrough]];
1108 case ARM::R0:
1109 case ARM::R1:
1110 case ARM::R2:
1111 case ARM::R3:
1112 case ARM::R4:
1113 case ARM::R5:
1114 case ARM::R6:
1115 case ARM::R7:
1116 case ARM::LR:
1118 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
1119 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1120 .addCFIIndex(CFIIndex)
1122 break;
1123 }
1124 }
1125 }
1126
1127 if (GPRCS2Size > 0 && !NeedsWinCFI) {
1128 MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
1129 for (const auto &Entry : CSI) {
1130 Register Reg = Entry.getReg();
1131 int FI = Entry.getFrameIdx();
1132 switch (Reg) {
1133 case ARM::R8:
1134 case ARM::R9:
1135 case ARM::R10:
1136 case ARM::R11:
1137 case ARM::R12:
1138 if (STI.splitFramePushPop(MF)) {
1139 unsigned DwarfReg = MRI->getDwarfRegNum(
1140 Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
1141 unsigned Offset = MFI.getObjectOffset(FI);
1142 unsigned CFIIndex = MF.addFrameInst(
1143 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1144 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1145 .addCFIIndex(CFIIndex)
1147 }
1148 break;
1149 }
1150 }
1151 }
1152
1153 if (DPRCSSize > 0 && !NeedsWinCFI) {
1154 // Since vpush register list cannot have gaps, there may be multiple vpush
1155 // instructions in the prologue.
1156 MachineBasicBlock::iterator Pos = std::next(LastPush);
1157 for (const auto &Entry : CSI) {
1158 Register Reg = Entry.getReg();
1159 int FI = Entry.getFrameIdx();
1160 if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
1161 (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
1162 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
1163 unsigned Offset = MFI.getObjectOffset(FI);
1164 unsigned CFIIndex = MF.addFrameInst(
1165 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1166 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1167 .addCFIIndex(CFIIndex)
1169 }
1170 }
1171 }
1172
1173 // Now we can emit descriptions of where the canonical frame address was
1174 // throughout the process. If we have a frame pointer, it takes over the job
1175 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1176 // actually get emitted.
1177 if (!NeedsWinCFI)
1178 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
1179
1180 if (STI.isTargetELF() && hasFP(MF))
1182 AFI->getFramePtrSpillOffset());
1183
1184 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1185 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1186 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1187 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1188 AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
1189
1190 // If we need dynamic stack realignment, do it here. Be paranoid and make
1191 // sure if we also have VLAs, we have a base pointer for frame access.
1192 // If aligned NEON registers were spilled, the stack has already been
1193 // realigned.
1194 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1195 Align MaxAlign = MFI.getMaxAlign();
1197 if (!AFI->isThumbFunction()) {
1198 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1199 false);
1200 } else {
1201 // We cannot use sp as source/dest register here, thus we're using r4 to
1202 // perform the calculations. We're emitting the following sequence:
1203 // mov r4, sp
1204 // -- use emitAligningInstructions to produce best sequence to zero
1205 // -- out lower bits in r4
1206 // mov sp, r4
1207 // FIXME: It will be better just to find spare register here.
1208 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1209 .addReg(ARM::SP, RegState::Kill)
1211 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1212 false);
1213 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1214 .addReg(ARM::R4, RegState::Kill)
1216 }
1217
1218 AFI->setShouldRestoreSPFromFP(true);
1219 }
1220
1221 // If we need a base pointer, set it up here. It's whatever the value
1222 // of the stack pointer is at this point. Any variable size objects
1223 // will be allocated after this, so we can still use the base pointer
1224 // to reference locals.
1225 // FIXME: Clarify FrameSetup flags here.
1226 if (RegInfo->hasBasePointer(MF)) {
1227 if (isARM)
1228 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1229 .addReg(ARM::SP)
1231 .add(condCodeOp());
1232 else
1233 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1234 .addReg(ARM::SP)
1236 }
1237
1238 // If the frame has variable sized objects then the epilogue must restore
1239 // the sp from fp. We can assume there's an FP here since hasFP already
1240 // checks for hasVarSizedObjects.
1241 if (MFI.hasVarSizedObjects())
1242 AFI->setShouldRestoreSPFromFP(true);
1243}
1244
1246 MachineBasicBlock &MBB) const {
1247 MachineFrameInfo &MFI = MF.getFrameInfo();
1249 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1250 const ARMBaseInstrInfo &TII =
1251 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1252 assert(!AFI->isThumb1OnlyFunction() &&
1253 "This emitEpilogue does not support Thumb1!");
1254 bool isARM = !AFI->isThumbFunction();
1255
1256 // Amount of stack space we reserved next to incoming args for either
1257 // varargs registers or stack arguments in tail calls made by this function.
1258 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1259
1260 // How much of the stack used by incoming arguments this function is expected
1261 // to restore in this particular epilogue.
1262 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1263 int NumBytes = (int)MFI.getStackSize();
1264 Register FramePtr = RegInfo->getFrameRegister(MF);
1265
1266 // All calls are tail calls in GHC calling conv, and functions have no
1267 // prologue/epilogue.
1269 return;
1270
1271 // First put ourselves on the first (from top) terminator instructions.
1273 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1274
1275 MachineBasicBlock::iterator RangeStart;
1276 if (!AFI->hasStackFrame()) {
1277 if (MF.hasWinCFI()) {
1278 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1280 RangeStart = initMBBRange(MBB, MBBI);
1281 }
1282
1283 if (NumBytes + IncomingArgStackToRestore != 0)
1284 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1285 NumBytes + IncomingArgStackToRestore,
1287 } else {
1288 // Unwind MBBI to point to first LDR / VLDRD.
1289 if (MBBI != MBB.begin()) {
1290 do {
1291 --MBBI;
1292 } while (MBBI != MBB.begin() &&
1294 if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1295 ++MBBI;
1296 }
1297
1298 if (MF.hasWinCFI()) {
1299 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1301 RangeStart = initMBBRange(MBB, MBBI);
1302 }
1303
1304 // Move SP to start of FP callee save spill area.
1305 NumBytes -= (ReservedArgStack +
1306 AFI->getFPCXTSaveAreaSize() +
1311
1312 // Reset SP based on frame pointer only if the stack frame extends beyond
1313 // frame pointer stack slot or target is ELF and the function has FP.
1314 if (AFI->shouldRestoreSPFromFP()) {
1315 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1316 if (NumBytes) {
1317 if (isARM)
1318 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1319 ARMCC::AL, 0, TII,
1321 else {
1322 // It's not possible to restore SP from FP in a single instruction.
1323 // For iOS, this looks like:
1324 // mov sp, r7
1325 // sub sp, #24
1326 // This is bad, if an interrupt is taken after the mov, sp is in an
1327 // inconsistent state.
1328 // Use the first callee-saved register as a scratch register.
1329 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1330 "No scratch register to restore SP from FP!");
1331 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1333 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1334 .addReg(ARM::R4)
1337 }
1338 } else {
1339 // Thumb2 or ARM.
1340 if (isARM)
1341 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1344 .add(condCodeOp())
1346 else
1347 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1351 }
1352 } else if (NumBytes &&
1353 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1354 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1356
1357 // Increment past our save areas.
1359 MBBI++;
1360
1361 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
1362 MBBI++;
1363 // Since vpop register list cannot have gaps, there may be multiple vpop
1364 // instructions in the epilogue.
1365 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1366 MBBI++;
1367 }
1368 if (AFI->getDPRCalleeSavedGapSize()) {
1369 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1370 "unexpected DPR alignment gap");
1371 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1373 }
1374
1376 MBBI++;
1377 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1378
1379 if (ReservedArgStack || IncomingArgStackToRestore) {
1380 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1381 "attempting to restore negative stack amount");
1382 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1383 ReservedArgStack + IncomingArgStackToRestore,
1385 }
1386
1387 // Validate PAC, It should have been already popped into R12. For CMSE entry
1388 // function, the validation instruction is emitted during expansion of the
1389 // tBXNS_RET, since the validation must use the value of SP at function
1390 // entry, before saving, resp. after restoring, FPCXTNS.
1391 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1392 BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1393 }
1394
1395 if (MF.hasWinCFI()) {
1397 BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1399 }
1400}
1401
1402/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1403/// debug info. It's the same as what we use for resolving the code-gen
1404/// references for now. FIXME: This can go wrong when references are
1405/// SP-relative and simple call frames aren't used.
1407 int FI,
1408 Register &FrameReg) const {
1409 return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1410}
1411
1413 int FI, Register &FrameReg,
1414 int SPAdj) const {
1415 const MachineFrameInfo &MFI = MF.getFrameInfo();
1416 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1418 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1419 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1420 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1421 bool isFixed = MFI.isFixedObjectIndex(FI);
1422
1423 FrameReg = ARM::SP;
1424 Offset += SPAdj;
1425
1426 // SP can move around if there are allocas. We may also lose track of SP
1427 // when emergency spilling inside a non-reserved call frame setup.
1428 bool hasMovingSP = !hasReservedCallFrame(MF);
1429
1430 // When dynamically realigning the stack, use the frame pointer for
1431 // parameters, and the stack/base pointer for locals.
1432 if (RegInfo->hasStackRealignment(MF)) {
1433 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1434 if (isFixed) {
1435 FrameReg = RegInfo->getFrameRegister(MF);
1436 Offset = FPOffset;
1437 } else if (hasMovingSP) {
1438 assert(RegInfo->hasBasePointer(MF) &&
1439 "VLAs and dynamic stack alignment, but missing base pointer!");
1440 FrameReg = RegInfo->getBaseRegister();
1441 Offset -= SPAdj;
1442 }
1443 return Offset;
1444 }
1445
1446 // If there is a frame pointer, use it when we can.
1447 if (hasFP(MF) && AFI->hasStackFrame()) {
1448 // Use frame pointer to reference fixed objects. Use it for locals if
1449 // there are VLAs (and thus the SP isn't reliable as a base).
1450 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1451 FrameReg = RegInfo->getFrameRegister(MF);
1452 return FPOffset;
1453 } else if (hasMovingSP) {
1454 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1455 if (AFI->isThumb2Function()) {
1456 // Try to use the frame pointer if we can, else use the base pointer
1457 // since it's available. This is handy for the emergency spill slot, in
1458 // particular.
1459 if (FPOffset >= -255 && FPOffset < 0) {
1460 FrameReg = RegInfo->getFrameRegister(MF);
1461 return FPOffset;
1462 }
1463 }
1464 } else if (AFI->isThumbFunction()) {
1465 // Prefer SP to base pointer, if the offset is suitably aligned and in
1466 // range as the effective range of the immediate offset is bigger when
1467 // basing off SP.
1468 // Use add <rd>, sp, #<imm8>
1469 // ldr <rd>, [sp, #<imm8>]
1470 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1471 return Offset;
1472 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1473 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1474 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1475 FrameReg = RegInfo->getFrameRegister(MF);
1476 return FPOffset;
1477 }
1478 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1479 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1480 FrameReg = RegInfo->getFrameRegister(MF);
1481 return FPOffset;
1482 }
1483 }
1484 // Use the base pointer if we have one.
1485 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1486 // That can happen if we forced a base pointer for a large call frame.
1487 if (RegInfo->hasBasePointer(MF)) {
1488 FrameReg = RegInfo->getBaseRegister();
1489 Offset -= SPAdj;
1490 }
1491 return Offset;
1492}
1493
1494void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1497 unsigned StmOpc, unsigned StrOpc,
1498 bool NoGap, bool (*Func)(unsigned, bool),
1499 unsigned NumAlignedDPRCS2Regs,
1500 unsigned MIFlags) const {
1501 MachineFunction &MF = *MBB.getParent();
1504
1505 DebugLoc DL;
1506
1507 using RegAndKill = std::pair<unsigned, bool>;
1508
1510 unsigned i = CSI.size();
1511 while (i != 0) {
1512 unsigned LastReg = 0;
1513 for (; i != 0; --i) {
1514 Register Reg = CSI[i-1].getReg();
1515 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1516
1517 // D-registers in the aligned area DPRCS2 are NOT spilled here.
1518 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1519 continue;
1520
1521 const MachineRegisterInfo &MRI = MF.getRegInfo();
1522 bool isLiveIn = MRI.isLiveIn(Reg);
1523 if (!isLiveIn && !MRI.isReserved(Reg))
1524 MBB.addLiveIn(Reg);
1525 // If NoGap is true, push consecutive registers and then leave the rest
1526 // for other instructions. e.g.
1527 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1528 if (NoGap && LastReg && LastReg != Reg-1)
1529 break;
1530 LastReg = Reg;
1531 // Do not set a kill flag on values that are also marked as live-in. This
1532 // happens with the @llvm-returnaddress intrinsic and with arguments
1533 // passed in callee saved registers.
1534 // Omitting the kill flags is conservatively correct even if the live-in
1535 // is not used after all.
1536 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1537 }
1538
1539 if (Regs.empty())
1540 continue;
1541
1542 llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1543 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1544 });
1545
1546 if (Regs.size() > 1 || StrOpc== 0) {
1547 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1548 .addReg(ARM::SP)
1549 .setMIFlags(MIFlags)
1551 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1552 MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1553 } else if (Regs.size() == 1) {
1554 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1555 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1556 .addReg(ARM::SP)
1557 .setMIFlags(MIFlags)
1558 .addImm(-4)
1560 }
1561 Regs.clear();
1562
1563 // Put any subsequent vpush instructions before this one: they will refer to
1564 // higher register numbers so need to be pushed first in order to preserve
1565 // monotonicity.
1566 if (MI != MBB.begin())
1567 --MI;
1568 }
1569}
1570
1571void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1574 unsigned LdmOpc, unsigned LdrOpc,
1575 bool isVarArg, bool NoGap,
1576 bool (*Func)(unsigned, bool),
1577 unsigned NumAlignedDPRCS2Regs) const {
1578 MachineFunction &MF = *MBB.getParent();
1582 bool hasPAC = AFI->shouldSignReturnAddress();
1583 DebugLoc DL;
1584 bool isTailCall = false;
1585 bool isInterrupt = false;
1586 bool isTrap = false;
1587 bool isCmseEntry = false;
1588 if (MBB.end() != MI) {
1589 DL = MI->getDebugLoc();
1590 unsigned RetOpcode = MI->getOpcode();
1591 isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1592 isInterrupt =
1593 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1594 isTrap =
1595 RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1596 RetOpcode == ARM::tTRAP;
1597 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1598 }
1599
1601 unsigned i = CSI.size();
1602 while (i != 0) {
1603 unsigned LastReg = 0;
1604 bool DeleteRet = false;
1605 for (; i != 0; --i) {
1606 CalleeSavedInfo &Info = CSI[i-1];
1607 Register Reg = Info.getReg();
1608 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1609
1610 // The aligned reloads from area DPRCS2 are not inserted here.
1611 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1612 continue;
1613 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1614 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1615 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1617 Reg = ARM::PC;
1618 // Fold the return instruction into the LDM.
1619 DeleteRet = true;
1620 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1621 // We 'restore' LR into PC so it is not live out of the return block:
1622 // Clear Restored bit.
1623 Info.setRestored(false);
1624 }
1625
1626 // If NoGap is true, pop consecutive registers and then leave the rest
1627 // for other instructions. e.g.
1628 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1629 if (NoGap && LastReg && LastReg != Reg-1)
1630 break;
1631
1632 LastReg = Reg;
1633 Regs.push_back(Reg);
1634 }
1635
1636 if (Regs.empty())
1637 continue;
1638
1639 llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1640 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1641 });
1642
1643 if (Regs.size() > 1 || LdrOpc == 0) {
1644 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1645 .addReg(ARM::SP)
1648 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1649 MIB.addReg(Regs[i], getDefRegState(true));
1650 if (DeleteRet) {
1651 if (MI != MBB.end()) {
1652 MIB.copyImplicitOps(*MI);
1653 MI->eraseFromParent();
1654 }
1655 }
1656 MI = MIB;
1657 } else if (Regs.size() == 1) {
1658 // If we adjusted the reg to PC from LR above, switch it back here. We
1659 // only do that for LDM.
1660 if (Regs[0] == ARM::PC)
1661 Regs[0] = ARM::LR;
1663 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1664 .addReg(ARM::SP, RegState::Define)
1665 .addReg(ARM::SP)
1667 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1668 // that refactoring is complete (eventually).
1669 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1670 MIB.addReg(0);
1672 } else
1673 MIB.addImm(4);
1674 MIB.add(predOps(ARMCC::AL));
1675 }
1676 Regs.clear();
1677
1678 // Put any subsequent vpop instructions after this one: they will refer to
1679 // higher register numbers so need to be popped afterwards.
1680 if (MI != MBB.end())
1681 ++MI;
1682 }
1683}
1684
1685/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1686/// starting from d8. Also insert stack realignment code and leave the stack
1687/// pointer pointing to the d8 spill slot.
1690 unsigned NumAlignedDPRCS2Regs,
1692 const TargetRegisterInfo *TRI) {
1693 MachineFunction &MF = *MBB.getParent();
1695 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1697 MachineFrameInfo &MFI = MF.getFrameInfo();
1698
1699 // Mark the D-register spill slots as properly aligned. Since MFI computes
1700 // stack slot layout backwards, this can actually mean that the d-reg stack
1701 // slot offsets can be wrong. The offset for d8 will always be correct.
1702 for (const CalleeSavedInfo &I : CSI) {
1703 unsigned DNum = I.getReg() - ARM::D8;
1704 if (DNum > NumAlignedDPRCS2Regs - 1)
1705 continue;
1706 int FI = I.getFrameIdx();
1707 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1708 // registers will be 8-byte aligned.
1709 MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1710
1711 // The stack slot for D8 needs to be maximally aligned because this is
1712 // actually the point where we align the stack pointer. MachineFrameInfo
1713 // computes all offsets relative to the incoming stack pointer which is a
1714 // bit weird when realigning the stack. Any extra padding for this
1715 // over-alignment is not realized because the code inserted below adjusts
1716 // the stack pointer by numregs * 8 before aligning the stack pointer.
1717 if (DNum == 0)
1718 MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1719 }
1720
1721 // Move the stack pointer to the d8 spill slot, and align it at the same
1722 // time. Leave the stack slot address in the scratch register r4.
1723 //
1724 // sub r4, sp, #numregs * 8
1725 // bic r4, r4, #align - 1
1726 // mov sp, r4
1727 //
1728 bool isThumb = AFI->isThumbFunction();
1729 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1730 AFI->setShouldRestoreSPFromFP(true);
1731
1732 // sub r4, sp, #numregs * 8
1733 // The immediate is <= 64, so it doesn't need any special encoding.
1734 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1735 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1736 .addReg(ARM::SP)
1737 .addImm(8 * NumAlignedDPRCS2Regs)
1739 .add(condCodeOp());
1740
1741 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1742 // We must set parameter MustBeSingleInstruction to true, since
1743 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1744 // stack alignment. Luckily, this can always be done since all ARM
1745 // architecture versions that support Neon also support the BFC
1746 // instruction.
1747 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1748
1749 // mov sp, r4
1750 // The stack pointer must be adjusted before spilling anything, otherwise
1751 // the stack slots could be clobbered by an interrupt handler.
1752 // Leave r4 live, it is used below.
1753 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1754 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1755 .addReg(ARM::R4)
1757 if (!isThumb)
1758 MIB.add(condCodeOp());
1759
1760 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1761 // r4 holds the stack slot address.
1762 unsigned NextReg = ARM::D8;
1763
1764 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1765 // The writeback is only needed when emitting two vst1.64 instructions.
1766 if (NumAlignedDPRCS2Regs >= 6) {
1767 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1768 &ARM::QQPRRegClass);
1769 MBB.addLiveIn(SupReg);
1770 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1771 .addReg(ARM::R4, RegState::Kill)
1772 .addImm(16)
1773 .addReg(NextReg)
1776 NextReg += 4;
1777 NumAlignedDPRCS2Regs -= 4;
1778 }
1779
1780 // We won't modify r4 beyond this point. It currently points to the next
1781 // register to be spilled.
1782 unsigned R4BaseReg = NextReg;
1783
1784 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1785 if (NumAlignedDPRCS2Regs >= 4) {
1786 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1787 &ARM::QQPRRegClass);
1788 MBB.addLiveIn(SupReg);
1789 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1790 .addReg(ARM::R4)
1791 .addImm(16)
1792 .addReg(NextReg)
1795 NextReg += 4;
1796 NumAlignedDPRCS2Regs -= 4;
1797 }
1798
1799 // 16-byte aligned vst1.64 with 2 d-regs.
1800 if (NumAlignedDPRCS2Regs >= 2) {
1801 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1802 &ARM::QPRRegClass);
1803 MBB.addLiveIn(SupReg);
1804 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1805 .addReg(ARM::R4)
1806 .addImm(16)
1807 .addReg(SupReg)
1809 NextReg += 2;
1810 NumAlignedDPRCS2Regs -= 2;
1811 }
1812
1813 // Finally, use a vanilla vstr.64 for the odd last register.
1814 if (NumAlignedDPRCS2Regs) {
1815 MBB.addLiveIn(NextReg);
1816 // vstr.64 uses addrmode5 which has an offset scale of 4.
1817 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1818 .addReg(NextReg)
1819 .addReg(ARM::R4)
1820 .addImm((NextReg - R4BaseReg) * 2)
1822 }
1823
1824 // The last spill instruction inserted should kill the scratch register r4.
1825 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1826}
1827
1828/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1829/// iterator to the following instruction.
1832 unsigned NumAlignedDPRCS2Regs) {
1833 // sub r4, sp, #numregs * 8
1834 // bic r4, r4, #align - 1
1835 // mov sp, r4
1836 ++MI; ++MI; ++MI;
1837 assert(MI->mayStore() && "Expecting spill instruction");
1838
1839 // These switches all fall through.
1840 switch(NumAlignedDPRCS2Regs) {
1841 case 7:
1842 ++MI;
1843 assert(MI->mayStore() && "Expecting spill instruction");
1844 [[fallthrough]];
1845 default:
1846 ++MI;
1847 assert(MI->mayStore() && "Expecting spill instruction");
1848 [[fallthrough]];
1849 case 1:
1850 case 2:
1851 case 4:
1852 assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1853 ++MI;
1854 }
1855 return MI;
1856}
1857
1858/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1859/// starting from d8. These instructions are assumed to execute while the
1860/// stack is still aligned, unlike the code inserted by emitPopInst.
1863 unsigned NumAlignedDPRCS2Regs,
1865 const TargetRegisterInfo *TRI) {
1866 MachineFunction &MF = *MBB.getParent();
1868 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1870
1871 // Find the frame index assigned to d8.
1872 int D8SpillFI = 0;
1873 for (const CalleeSavedInfo &I : CSI)
1874 if (I.getReg() == ARM::D8) {
1875 D8SpillFI = I.getFrameIdx();
1876 break;
1877 }
1878
1879 // Materialize the address of the d8 spill slot into the scratch register r4.
1880 // This can be fairly complicated if the stack frame is large, so just use
1881 // the normal frame index elimination mechanism to do it. This code runs as
1882 // the initial part of the epilog where the stack and base pointers haven't
1883 // been changed yet.
1884 bool isThumb = AFI->isThumbFunction();
1885 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1886
1887 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1888 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1889 .addFrameIndex(D8SpillFI)
1890 .addImm(0)
1892 .add(condCodeOp());
1893
1894 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1895 unsigned NextReg = ARM::D8;
1896
1897 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1898 if (NumAlignedDPRCS2Regs >= 6) {
1899 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1900 &ARM::QQPRRegClass);
1901 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1902 .addReg(ARM::R4, RegState::Define)
1903 .addReg(ARM::R4, RegState::Kill)
1904 .addImm(16)
1907 NextReg += 4;
1908 NumAlignedDPRCS2Regs -= 4;
1909 }
1910
1911 // We won't modify r4 beyond this point. It currently points to the next
1912 // register to be spilled.
1913 unsigned R4BaseReg = NextReg;
1914
1915 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1916 if (NumAlignedDPRCS2Regs >= 4) {
1917 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1918 &ARM::QQPRRegClass);
1919 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1920 .addReg(ARM::R4)
1921 .addImm(16)
1924 NextReg += 4;
1925 NumAlignedDPRCS2Regs -= 4;
1926 }
1927
1928 // 16-byte aligned vld1.64 with 2 d-regs.
1929 if (NumAlignedDPRCS2Regs >= 2) {
1930 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1931 &ARM::QPRRegClass);
1932 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1933 .addReg(ARM::R4)
1934 .addImm(16)
1936 NextReg += 2;
1937 NumAlignedDPRCS2Regs -= 2;
1938 }
1939
1940 // Finally, use a vanilla vldr.64 for the remaining odd register.
1941 if (NumAlignedDPRCS2Regs)
1942 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1943 .addReg(ARM::R4)
1944 .addImm(2 * (NextReg - R4BaseReg))
1946
1947 // Last store kills r4.
1948 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1949}
1950
1954 if (CSI.empty())
1955 return false;
1956
1957 MachineFunction &MF = *MBB.getParent();
1959
1960 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1961 unsigned PushOneOpc = AFI->isThumbFunction() ?
1962 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1963 unsigned FltOpc = ARM::VSTMDDB_UPD;
1964 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1965 // Compute PAC in R12.
1966 if (AFI->shouldSignReturnAddress()) {
1967 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
1969 }
1970 // Save the non-secure floating point context.
1971 if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
1972 return C.getReg() == ARM::FPCXTNS;
1973 })) {
1974 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
1975 ARM::SP)
1976 .addReg(ARM::SP)
1977 .addImm(-4)
1979 }
1980 if (STI.splitFramePointerPush(MF)) {
1981 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
1983 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1984 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1985 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
1987 } else {
1988 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
1990 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
1992 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1993 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1994 }
1995
1996 // The code above does not insert spill code for the aligned DPRCS2 registers.
1997 // The stack realignment code will be inserted between the push instructions
1998 // and these spills.
1999 if (NumAlignedDPRCS2Regs)
2000 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2001
2002 return true;
2003}
2004
2008 if (CSI.empty())
2009 return false;
2010
2011 MachineFunction &MF = *MBB.getParent();
2013 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2014 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2015
2016 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2017 // registers. Do that here instead.
2018 if (NumAlignedDPRCS2Regs)
2019 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2020
2021 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2022 unsigned LdrOpc =
2023 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2024 unsigned FltOpc = ARM::VLDMDIA_UPD;
2025 if (STI.splitFramePointerPush(MF)) {
2026 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2028 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2029 NumAlignedDPRCS2Regs);
2030 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2032 } else {
2033 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2034 NumAlignedDPRCS2Regs);
2035 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2036 &isARMArea2Register, 0);
2037 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2038 &isARMArea1Register, 0);
2039 }
2040
2041 return true;
2042}
2043
2044// FIXME: Make generic?
2046 const ARMBaseInstrInfo &TII) {
2047 unsigned FnSize = 0;
2048 for (auto &MBB : MF) {
2049 for (auto &MI : MBB)
2050 FnSize += TII.getInstSizeInBytes(MI);
2051 }
2052 if (MF.getJumpTableInfo())
2053 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2054 FnSize += Table.MBBs.size() * 4;
2055 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2056 return FnSize;
2057}
2058
2059/// estimateRSStackSizeLimit - Look at each instruction that references stack
2060/// frames and return the stack size limit beyond which some of these
2061/// instructions will require a scratch register during their expansion later.
2062// FIXME: Move to TII?
2064 const TargetFrameLowering *TFI,
2065 bool &HasNonSPFrameIndex) {
2066 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2067 const ARMBaseInstrInfo &TII =
2068 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2070 unsigned Limit = (1 << 12) - 1;
2071 for (auto &MBB : MF) {
2072 for (auto &MI : MBB) {
2073 if (MI.isDebugInstr())
2074 continue;
2075 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2076 if (!MI.getOperand(i).isFI())
2077 continue;
2078
2079 // When using ADDri to get the address of a stack object, 255 is the
2080 // largest offset guaranteed to fit in the immediate offset.
2081 if (MI.getOpcode() == ARM::ADDri) {
2082 Limit = std::min(Limit, (1U << 8) - 1);
2083 break;
2084 }
2085 // t2ADDri will not require an extra register, it can reuse the
2086 // destination.
2087 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2088 break;
2089
2090 const MCInstrDesc &MCID = MI.getDesc();
2091 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
2092 if (RegClass && !RegClass->contains(ARM::SP))
2093 HasNonSPFrameIndex = true;
2094
2095 // Otherwise check the addressing mode.
2096 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2098 case ARMII::AddrMode2:
2099 // Default 12 bit limit.
2100 break;
2101 case ARMII::AddrMode3:
2103 Limit = std::min(Limit, (1U << 8) - 1);
2104 break;
2106 Limit = std::min(Limit, ((1U << 8) - 1) * 2);
2107 break;
2108 case ARMII::AddrMode5:
2111 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
2112 break;
2114 // i12 supports only positive offset so these will be converted to
2115 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2116 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2117 Limit = std::min(Limit, (1U << 8) - 1);
2118 break;
2119 case ARMII::AddrMode4:
2120 case ARMII::AddrMode6:
2121 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2122 // immediate offset for stack references.
2123 return 0;
2125 Limit = std::min(Limit, ((1U << 7) - 1) * 1);
2126 break;
2128 Limit = std::min(Limit, ((1U << 7) - 1) * 2);
2129 break;
2131 Limit = std::min(Limit, ((1U << 7) - 1) * 4);
2132 break;
2133 default:
2134 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2135 }
2136 break; // At most one FI per instruction
2137 }
2138 }
2139 }
2140
2141 return Limit;
2142}
2143
2144// In functions that realign the stack, it can be an advantage to spill the
2145// callee-saved vector registers after realigning the stack. The vst1 and vld1
2146// instructions take alignment hints that can improve performance.
2147static void
2149 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2151 return;
2152
2153 // Naked functions don't spill callee-saved registers.
2154 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2155 return;
2156
2157 // We are planning to use NEON instructions vst1 / vld1.
2158 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2159 return;
2160
2161 // Don't bother if the default stack alignment is sufficiently high.
2163 return;
2164
2165 // Aligned spills require stack realignment.
2166 if (!static_cast<const ARMBaseRegisterInfo *>(
2168 return;
2169
2170 // We always spill contiguous d-registers starting from d8. Count how many
2171 // needs spilling. The register allocator will almost always use the
2172 // callee-saved registers in order, but it can happen that there are holes in
2173 // the range. Registers above the hole will be spilled to the standard DPRCS
2174 // area.
2175 unsigned NumSpills = 0;
2176 for (; NumSpills < 8; ++NumSpills)
2177 if (!SavedRegs.test(ARM::D8 + NumSpills))
2178 break;
2179
2180 // Don't do this for just one d-register. It's not worth it.
2181 if (NumSpills < 2)
2182 return;
2183
2184 // Spill the first NumSpills D-registers after realigning the stack.
2185 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2186
2187 // A scratch register is required for the vst1 / vld1 instructions.
2188 SavedRegs.set(ARM::R4);
2189}
2190
2192 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2193 // upon function entry (resp. restore it immmediately before return)
2194 if (STI.hasV8_1MMainlineOps() &&
2196 return false;
2197
2198 // We are disabling shrinkwrapping for now when PAC is enabled, as
2199 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2200 // generated. A follow-up patch will fix this in a more performant manner.
2202 true /* SpillsLR */))
2203 return false;
2204
2205 return true;
2206}
2207
2209 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2210 return Subtarget.createAAPCSFrameChainLeaf() ||
2211 (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls());
2212}
2213
2214// Thumb1 may require a spill when storing to a frame index through FP, for
2215// cases where FP is a high register (R11). This scans the function for cases
2216// where this may happen.
2218 const TargetFrameLowering &TFI) {
2219 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2220 if (!AFI->isThumb1OnlyFunction())
2221 return false;
2222
2223 for (const auto &MBB : MF)
2224 for (const auto &MI : MBB)
2225 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi)
2226 for (const auto &Op : MI.operands())
2227 if (Op.isFI()) {
2228 Register Reg;
2229 TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
2230 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2231 return true;
2232 }
2233 return false;
2234}
2235
2237 BitVector &SavedRegs,
2238 RegScavenger *RS) const {
2240 // This tells PEI to spill the FP as if it is any other callee-save register
2241 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2242 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2243 // to combine multiple loads / stores.
2244 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
2245 bool CS1Spilled = false;
2246 bool LRSpilled = false;
2247 unsigned NumGPRSpills = 0;
2248 unsigned NumFPRSpills = 0;
2249 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2250 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2251 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2253 const ARMBaseInstrInfo &TII =
2254 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2256 MachineFrameInfo &MFI = MF.getFrameInfo();
2259 (void)TRI; // Silence unused warning in non-assert builds.
2260 Register FramePtr = RegInfo->getFrameRegister(MF);
2261
2262 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2263 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2264 // since it's not always possible to restore sp from fp in a single
2265 // instruction.
2266 // FIXME: It will be better just to find spare register here.
2267 if (AFI->isThumb2Function() &&
2268 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2269 SavedRegs.set(ARM::R4);
2270
2271 // If a stack probe will be emitted, spill R4 and LR, since they are
2272 // clobbered by the stack probe call.
2273 // This estimate should be a safe, conservative estimate. The actual
2274 // stack probe is enabled based on the size of the local objects;
2275 // this estimate also includes the varargs store size.
2276 if (STI.isTargetWindows() &&
2277 WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2278 SavedRegs.set(ARM::R4);
2279 SavedRegs.set(ARM::LR);
2280 }
2281
2282 if (AFI->isThumb1OnlyFunction()) {
2283 // Spill LR if Thumb1 function uses variable length argument lists.
2284 if (AFI->getArgRegsSaveSize() > 0)
2285 SavedRegs.set(ARM::LR);
2286
2287 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2288 // requires stack alignment. We don't know for sure what the stack size
2289 // will be, but for this, an estimate is good enough. If there anything
2290 // changes it, it'll be a spill, which implies we've used all the registers
2291 // and so R4 is already used, so not marking it here will be OK.
2292 // FIXME: It will be better just to find spare register here.
2293 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2294 MFI.estimateStackSize(MF) > 508)
2295 SavedRegs.set(ARM::R4);
2296 }
2297
2298 // See if we can spill vector registers to aligned stack.
2299 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2300
2301 // Spill the BasePtr if it's used.
2302 if (RegInfo->hasBasePointer(MF))
2303 SavedRegs.set(RegInfo->getBaseRegister());
2304
2305 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2306 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2307 CanEliminateFrame = false;
2308
2309 // Don't spill FP if the frame can be eliminated. This is determined
2310 // by scanning the callee-save registers to see if any is modified.
2311 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2312 for (unsigned i = 0; CSRegs[i]; ++i) {
2313 unsigned Reg = CSRegs[i];
2314 bool Spilled = false;
2315 if (SavedRegs.test(Reg)) {
2316 Spilled = true;
2317 CanEliminateFrame = false;
2318 }
2319
2320 if (!ARM::GPRRegClass.contains(Reg)) {
2321 if (Spilled) {
2322 if (ARM::SPRRegClass.contains(Reg))
2323 NumFPRSpills++;
2324 else if (ARM::DPRRegClass.contains(Reg))
2325 NumFPRSpills += 2;
2326 else if (ARM::QPRRegClass.contains(Reg))
2327 NumFPRSpills += 4;
2328 }
2329 continue;
2330 }
2331
2332 if (Spilled) {
2333 NumGPRSpills++;
2334
2335 if (!STI.splitFramePushPop(MF)) {
2336 if (Reg == ARM::LR)
2337 LRSpilled = true;
2338 CS1Spilled = true;
2339 continue;
2340 }
2341
2342 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2343 switch (Reg) {
2344 case ARM::LR:
2345 LRSpilled = true;
2346 [[fallthrough]];
2347 case ARM::R0: case ARM::R1:
2348 case ARM::R2: case ARM::R3:
2349 case ARM::R4: case ARM::R5:
2350 case ARM::R6: case ARM::R7:
2351 CS1Spilled = true;
2352 break;
2353 default:
2354 break;
2355 }
2356 } else {
2357 if (!STI.splitFramePushPop(MF)) {
2358 UnspilledCS1GPRs.push_back(Reg);
2359 continue;
2360 }
2361
2362 switch (Reg) {
2363 case ARM::R0: case ARM::R1:
2364 case ARM::R2: case ARM::R3:
2365 case ARM::R4: case ARM::R5:
2366 case ARM::R6: case ARM::R7:
2367 case ARM::LR:
2368 UnspilledCS1GPRs.push_back(Reg);
2369 break;
2370 default:
2371 UnspilledCS2GPRs.push_back(Reg);
2372 break;
2373 }
2374 }
2375 }
2376
2377 bool ForceLRSpill = false;
2378 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2379 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2380 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2381 // use of BL to implement far jump.
2382 if (FnSize >= (1 << 11)) {
2383 CanEliminateFrame = false;
2384 ForceLRSpill = true;
2385 }
2386 }
2387
2388 // If any of the stack slot references may be out of range of an immediate
2389 // offset, make sure a register (or a spill slot) is available for the
2390 // register scavenger. Note that if we're indexing off the frame pointer, the
2391 // effective stack size is 4 bytes larger since the FP points to the stack
2392 // slot of the previous FP. Also, if we have variable sized objects in the
2393 // function, stack slot references will often be negative, and some of
2394 // our instructions are positive-offset only, so conservatively consider
2395 // that case to want a spill slot (or register) as well. Similarly, if
2396 // the function adjusts the stack pointer during execution and the
2397 // adjustments aren't already part of our stack size estimate, our offset
2398 // calculations may be off, so be conservative.
2399 // FIXME: We could add logic to be more precise about negative offsets
2400 // and which instructions will need a scratch register for them. Is it
2401 // worth the effort and added fragility?
2402 unsigned EstimatedStackSize =
2403 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2404
2405 // Determine biggest (positive) SP offset in MachineFrameInfo.
2406 int MaxFixedOffset = 0;
2407 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2408 int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2409 MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2410 }
2411
2412 bool HasFP = hasFP(MF);
2413 if (HasFP) {
2414 if (AFI->hasStackFrame())
2415 EstimatedStackSize += 4;
2416 } else {
2417 // If FP is not used, SP will be used to access arguments, so count the
2418 // size of arguments into the estimation.
2419 EstimatedStackSize += MaxFixedOffset;
2420 }
2421 EstimatedStackSize += 16; // For possible paddings.
2422
2423 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2424 bool HasNonSPFrameIndex = false;
2425 if (AFI->isThumb1OnlyFunction()) {
2426 // For Thumb1, don't bother to iterate over the function. The only
2427 // instruction that requires an emergency spill slot is a store to a
2428 // frame index.
2429 //
2430 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2431 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2432 // a 5-bit unsigned immediate.
2433 //
2434 // We could try to check if the function actually contains a tSTRspi
2435 // that might need the spill slot, but it's not really important.
2436 // Functions with VLAs or extremely large call frames are rare, and
2437 // if a function is allocating more than 1KB of stack, an extra 4-byte
2438 // slot probably isn't relevant.
2439 //
2440 // A special case is the scenario where r11 is used as FP, where accesses
2441 // to a frame index will require its value to be moved into a low reg.
2442 // This is handled later on, once we are able to determine if we have any
2443 // fp-relative accesses.
2444 if (RegInfo->hasBasePointer(MF))
2445 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2446 else
2447 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2448 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2449 } else {
2450 EstimatedRSStackSizeLimit =
2451 estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2452 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2453 }
2454 // Final estimate of whether sp or bp-relative accesses might require
2455 // scavenging.
2456 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2457
2458 // If the stack pointer moves and we don't have a base pointer, the
2459 // estimate logic doesn't work. The actual offsets might be larger when
2460 // we're constructing a call frame, or we might need to use negative
2461 // offsets from fp.
2462 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2463 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2464 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2465
2466 // If we have a frame pointer, we assume arguments will be accessed
2467 // relative to the frame pointer. Check whether fp-relative accesses to
2468 // arguments require scavenging.
2469 //
2470 // We could do slightly better on Thumb1; in some cases, an sp-relative
2471 // offset would be legal even though an fp-relative offset is not.
2472 int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
2473 bool HasLargeArgumentList =
2474 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2475
2476 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2477 HasLargeArgumentList || HasNonSPFrameIndex;
2478 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2479 << "; EstimatedStack: " << EstimatedStackSize
2480 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2481 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2482 if (BigFrameOffsets ||
2483 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2484 AFI->setHasStackFrame(true);
2485
2486 if (HasFP) {
2487 SavedRegs.set(FramePtr);
2488 // If the frame pointer is required by the ABI, also spill LR so that we
2489 // emit a complete frame record.
2490 if ((requiresAAPCSFrameRecord(MF) ||
2492 !LRSpilled) {
2493 SavedRegs.set(ARM::LR);
2494 LRSpilled = true;
2495 NumGPRSpills++;
2496 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2497 if (LRPos != UnspilledCS1GPRs.end())
2498 UnspilledCS1GPRs.erase(LRPos);
2499 }
2500 auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2501 if (FPPos != UnspilledCS1GPRs.end())
2502 UnspilledCS1GPRs.erase(FPPos);
2503 NumGPRSpills++;
2504 if (FramePtr == ARM::R7)
2505 CS1Spilled = true;
2506 }
2507
2508 // This is true when we inserted a spill for a callee-save GPR which is
2509 // not otherwise used by the function. This guaranteees it is possible
2510 // to scavenge a register to hold the address of a stack slot. On Thumb1,
2511 // the register must be a valid operand to tSTRi, i.e. r4-r7. For other
2512 // subtargets, this is any GPR, i.e. r4-r11 or lr.
2513 //
2514 // If we don't insert a spill, we instead allocate an emergency spill
2515 // slot, which can be used by scavenging to spill an arbitrary register.
2516 //
2517 // We currently don't try to figure out whether any specific instruction
2518 // requires scavening an additional register.
2519 bool ExtraCSSpill = false;
2520
2521 if (AFI->isThumb1OnlyFunction()) {
2522 // For Thumb1-only targets, we need some low registers when we save and
2523 // restore the high registers (which aren't allocatable, but could be
2524 // used by inline assembly) because the push/pop instructions can not
2525 // access high registers. If necessary, we might need to push more low
2526 // registers to ensure that there is at least one free that can be used
2527 // for the saving & restoring, and preferably we should ensure that as
2528 // many as are needed are available so that fewer push/pop instructions
2529 // are required.
2530
2531 // Low registers which are not currently pushed, but could be (r4-r7).
2532 SmallVector<unsigned, 4> AvailableRegs;
2533
2534 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2535 // free.
2536 int EntryRegDeficit = 0;
2537 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2538 if (!MF.getRegInfo().isLiveIn(Reg)) {
2539 --EntryRegDeficit;
2541 << printReg(Reg, TRI)
2542 << " is unused argument register, EntryRegDeficit = "
2543 << EntryRegDeficit << "\n");
2544 }
2545 }
2546
2547 // Unused return registers can be clobbered in the epilogue for free.
2548 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2550 << " return regs used, ExitRegDeficit = "
2551 << ExitRegDeficit << "\n");
2552
2553 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2554 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2555
2556 // r4-r6 can be used in the prologue if they are pushed by the first push
2557 // instruction.
2558 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2559 if (SavedRegs.test(Reg)) {
2560 --RegDeficit;
2561 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2562 << " is saved low register, RegDeficit = "
2563 << RegDeficit << "\n");
2564 } else {
2565 AvailableRegs.push_back(Reg);
2566 LLVM_DEBUG(
2567 dbgs()
2568 << printReg(Reg, TRI)
2569 << " is non-saved low register, adding to AvailableRegs\n");
2570 }
2571 }
2572
2573 // r7 can be used if it is not being used as the frame pointer.
2574 if (!HasFP || FramePtr != ARM::R7) {
2575 if (SavedRegs.test(ARM::R7)) {
2576 --RegDeficit;
2577 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2578 << RegDeficit << "\n");
2579 } else {
2580 AvailableRegs.push_back(ARM::R7);
2581 LLVM_DEBUG(
2582 dbgs()
2583 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2584 }
2585 }
2586
2587 // Each of r8-r11 needs to be copied to a low register, then pushed.
2588 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2589 if (SavedRegs.test(Reg)) {
2590 ++RegDeficit;
2591 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2592 << " is saved high register, RegDeficit = "
2593 << RegDeficit << "\n");
2594 }
2595 }
2596
2597 // LR can only be used by PUSH, not POP, and can't be used at all if the
2598 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2599 // are more limited at function entry than exit.
2600 if ((EntryRegDeficit > ExitRegDeficit) &&
2601 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2603 if (SavedRegs.test(ARM::LR)) {
2604 --RegDeficit;
2605 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2606 << RegDeficit << "\n");
2607 } else {
2608 AvailableRegs.push_back(ARM::LR);
2609 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2610 }
2611 }
2612
2613 // If there are more high registers that need pushing than low registers
2614 // available, push some more low registers so that we can use fewer push
2615 // instructions. This might not reduce RegDeficit all the way to zero,
2616 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2617 // need saving.
2618 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2619 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2620 unsigned Reg = AvailableRegs.pop_back_val();
2621 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2622 << " to make up reg deficit\n");
2623 SavedRegs.set(Reg);
2624 NumGPRSpills++;
2625 CS1Spilled = true;
2626 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2627 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2628 ExtraCSSpill = true;
2629 UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2630 if (Reg == ARM::LR)
2631 LRSpilled = true;
2632 }
2633 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2634 << "\n");
2635 }
2636
2637 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2638 // restore LR in that case.
2639 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2640
2641 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2642 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2643 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2644 SavedRegs.set(ARM::LR);
2645 NumGPRSpills++;
2647 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2648 if (LRPos != UnspilledCS1GPRs.end())
2649 UnspilledCS1GPRs.erase(LRPos);
2650
2651 ForceLRSpill = false;
2652 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2653 !AFI->isThumb1OnlyFunction())
2654 ExtraCSSpill = true;
2655 }
2656
2657 // If stack and double are 8-byte aligned and we are spilling an odd number
2658 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2659 // the integer and double callee save areas.
2660 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2661 const Align TargetAlign = getStackAlign();
2662 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2663 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2664 for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
2665 unsigned Reg = UnspilledCS1GPRs[i];
2666 // Don't spill high register if the function is thumb. In the case of
2667 // Windows on ARM, accept R11 (frame pointer)
2668 if (!AFI->isThumbFunction() ||
2669 (STI.isTargetWindows() && Reg == ARM::R11) ||
2670 isARMLowRegister(Reg) ||
2671 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2672 SavedRegs.set(Reg);
2673 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2674 << " to make up alignment\n");
2675 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2676 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2677 ExtraCSSpill = true;
2678 break;
2679 }
2680 }
2681 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2682 unsigned Reg = UnspilledCS2GPRs.front();
2683 SavedRegs.set(Reg);
2684 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2685 << " to make up alignment\n");
2686 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2687 ExtraCSSpill = true;
2688 }
2689 }
2690
2691 // Estimate if we might need to scavenge a register at some point in order
2692 // to materialize a stack offset. If so, either spill one additional
2693 // callee-saved register or reserve a special spill slot to facilitate
2694 // register scavenging. Thumb1 needs a spill slot for stack pointer
2695 // adjustments and for frame index accesses when FP is high register,
2696 // even when the frame itself is small.
2697 if (!ExtraCSSpill &&
2698 (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this))) {
2699 // If any non-reserved CS register isn't spilled, just spill one or two
2700 // extra. That should take care of it!
2701 unsigned NumExtras = TargetAlign.value() / 4;
2703 while (NumExtras && !UnspilledCS1GPRs.empty()) {
2704 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
2705 if (!MRI.isReserved(Reg) &&
2706 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2707 Extras.push_back(Reg);
2708 NumExtras--;
2709 }
2710 }
2711 // For non-Thumb1 functions, also check for hi-reg CS registers
2712 if (!AFI->isThumb1OnlyFunction()) {
2713 while (NumExtras && !UnspilledCS2GPRs.empty()) {
2714 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
2715 if (!MRI.isReserved(Reg)) {
2716 Extras.push_back(Reg);
2717 NumExtras--;
2718 }
2719 }
2720 }
2721 if (NumExtras == 0) {
2722 for (unsigned Reg : Extras) {
2723 SavedRegs.set(Reg);
2724 if (!MRI.isPhysRegUsed(Reg))
2725 ExtraCSSpill = true;
2726 }
2727 }
2728 if (!ExtraCSSpill && RS) {
2729 // Reserve a slot closest to SP or frame pointer.
2730 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2731 const TargetRegisterClass &RC = ARM::GPRRegClass;
2732 unsigned Size = TRI->getSpillSize(RC);
2733 Align Alignment = TRI->getSpillAlign(RC);
2735 MFI.CreateStackObject(Size, Alignment, false));
2736 }
2737 }
2738 }
2739
2740 if (ForceLRSpill)
2741 SavedRegs.set(ARM::LR);
2742 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2743}
2744
2746 BitVector &SavedRegs) const {
2748
2749 // If we have the "returned" parameter attribute which guarantees that we
2750 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2751 // record that fact for IPRA.
2752 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2753 if (AFI->getPreservesR0())
2754 SavedRegs.set(ARM::R0);
2755}
2756
2759 std::vector<CalleeSavedInfo> &CSI) const {
2760 // For CMSE entry functions, handle floating-point context as if it was a
2761 // callee-saved register.
2762 if (STI.hasV8_1MMainlineOps() &&
2764 CSI.emplace_back(ARM::FPCXTNS);
2765 CSI.back().setRestored(false);
2766 }
2767
2768 // For functions, which sign their return address, upon function entry, the
2769 // return address PAC is computed in R12. Treat R12 as a callee-saved register
2770 // in this case.
2771 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
2772 if (AFI.shouldSignReturnAddress()) {
2773 // The order of register must match the order we push them, because the
2774 // PEI assigns frame indices in that order. When compiling for return
2775 // address sign and authenication, we use split push, therefore the orders
2776 // we want are:
2777 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2778 CSI.insert(find_if(CSI,
2779 [=](const auto &CS) {
2780 Register Reg = CS.getReg();
2781 return Reg == ARM::R10 || Reg == ARM::R11 ||
2782 Reg == ARM::R8 || Reg == ARM::R9 ||
2783 ARM::DPRRegClass.contains(Reg);
2784 }),
2785 CalleeSavedInfo(ARM::R12));
2786 }
2787
2788 return false;
2789}
2790
2793 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
2794 NumEntries = std::size(FixedSpillOffsets);
2795 return FixedSpillOffsets;
2796}
2797
2798MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2801 const ARMBaseInstrInfo &TII =
2802 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2804 bool isARM = !AFI->isThumbFunction();
2805 DebugLoc dl = I->getDebugLoc();
2806 unsigned Opc = I->getOpcode();
2807 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
2808 unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2809
2810 assert(!AFI->isThumb1OnlyFunction() &&
2811 "This eliminateCallFramePseudoInstr does not support Thumb1!");
2812
2813 int PIdx = I->findFirstPredOperandIdx();
2814 ARMCC::CondCodes Pred = (PIdx == -1)
2815 ? ARMCC::AL
2816 : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
2817 unsigned PredReg = TII.getFramePred(*I);
2818
2819 if (!hasReservedCallFrame(MF)) {
2820 // Bail early if the callee is expected to do the adjustment.
2821 if (IsDestroy && CalleePopAmount != -1U)
2822 return MBB.erase(I);
2823
2824 // If we have alloca, convert as follows:
2825 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2826 // ADJCALLSTACKUP -> add, sp, sp, amount
2827 unsigned Amount = TII.getFrameSize(*I);
2828 if (Amount != 0) {
2829 // We need to keep the stack aligned properly. To do this, we round the
2830 // amount of space needed for the outgoing arguments up to the next
2831 // alignment boundary.
2832 Amount = alignSPAdjust(Amount);
2833
2834 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2835 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2836 Pred, PredReg);
2837 } else {
2838 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2839 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2840 Pred, PredReg);
2841 }
2842 }
2843 } else if (CalleePopAmount != -1U) {
2844 // If the calling convention demands that the callee pops arguments from the
2845 // stack, we want to add it back if we have a reserved call frame.
2846 emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
2847 MachineInstr::NoFlags, Pred, PredReg);
2848 }
2849 return MBB.erase(I);
2850}
2851
2852/// Get the minimum constant for ARM that is greater than or equal to the
2853/// argument. In ARM, constants can have any value that can be produced by
2854/// rotating an 8-bit value to the right by an even number of bits within a
2855/// 32-bit word.
2857 unsigned Shifted = 0;
2858
2859 if (Value == 0)
2860 return 0;
2861
2862 while (!(Value & 0xC0000000)) {
2863 Value = Value << 2;
2864 Shifted += 2;
2865 }
2866
2867 bool Carry = (Value & 0x00FFFFFF);
2868 Value = ((Value & 0xFF000000) >> 24) + Carry;
2869
2870 if (Value & 0x0000100)
2871 Value = Value & 0x000001FC;
2872
2873 if (Shifted > 24)
2874 Value = Value >> (Shifted - 24);
2875 else
2876 Value = Value << (24 - Shifted);
2877
2878 return Value;
2879}
2880
2881// The stack limit in the TCB is set to this many bytes above the actual
2882// stack limit.
2884
2885// Adjust the function prologue to enable split stacks. This currently only
2886// supports android and linux.
2887//
2888// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2889// must be well defined in order to allow for consistent implementations of the
2890// __morestack helper function. The ABI is also not a normal ABI in that it
2891// doesn't follow the normal calling conventions because this allows the
2892// prologue of each function to be optimized further.
2893//
2894// Currently, the ABI looks like (when calling __morestack)
2895//
2896// * r4 holds the minimum stack size requested for this function call
2897// * r5 holds the stack size of the arguments to the function
2898// * the beginning of the function is 3 instructions after the call to
2899// __morestack
2900//
2901// Implementations of __morestack should use r4 to allocate a new stack, r5 to
2902// place the arguments on to the new stack, and the 3-instruction knowledge to
2903// jump directly to the body of the function when working on the new stack.
2904//
2905// An old (and possibly no longer compatible) implementation of __morestack for
2906// ARM can be found at [1].
2907//
2908// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2910 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2911 unsigned Opcode;
2912 unsigned CFIIndex;
2913 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2914 bool Thumb = ST->isThumb();
2915 bool Thumb2 = ST->isThumb2();
2916
2917 // Sadly, this currently doesn't support varargs, platforms other than
2918 // android/linux. Note that thumb1/thumb2 are support for android/linux.
2919 if (MF.getFunction().isVarArg())
2920 report_fatal_error("Segmented stacks do not support vararg functions.");
2921 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2922 report_fatal_error("Segmented stacks not supported on this platform.");
2923
2924 MachineFrameInfo &MFI = MF.getFrameInfo();
2925 MachineModuleInfo &MMI = MF.getMMI();
2926 MCContext &Context = MMI.getContext();
2927 const MCRegisterInfo *MRI = Context.getRegisterInfo();
2928 const ARMBaseInstrInfo &TII =
2929 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2931 DebugLoc DL;
2932
2933 if (!MFI.needsSplitStackProlog())
2934 return;
2935
2936 uint64_t StackSize = MFI.getStackSize();
2937
2938 // Use R4 and R5 as scratch registers.
2939 // We save R4 and R5 before use and restore them before leaving the function.
2940 unsigned ScratchReg0 = ARM::R4;
2941 unsigned ScratchReg1 = ARM::R5;
2942 uint64_t AlignedStackSize;
2943
2944 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2945 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2949
2950 // Grab everything that reaches PrologueMBB to update there liveness as well.
2951 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2953 WalkList.push_back(&PrologueMBB);
2954
2955 do {
2956 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2957 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2958 if (BeforePrologueRegion.insert(PredBB).second)
2959 WalkList.push_back(PredBB);
2960 }
2961 } while (!WalkList.empty());
2962
2963 // The order in that list is important.
2964 // The blocks will all be inserted before PrologueMBB using that order.
2965 // Therefore the block that should appear first in the CFG should appear
2966 // first in the list.
2967 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2968 PostStackMBB};
2969
2970 for (MachineBasicBlock *B : AddedBlocks)
2971 BeforePrologueRegion.insert(B);
2972
2973 for (const auto &LI : PrologueMBB.liveins()) {
2974 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
2975 PredBB->addLiveIn(LI);
2976 }
2977
2978 // Remove the newly added blocks from the list, since we know
2979 // we do not have to do the following updates for them.
2980 for (MachineBasicBlock *B : AddedBlocks) {
2981 BeforePrologueRegion.erase(B);
2982 MF.insert(PrologueMBB.getIterator(), B);
2983 }
2984
2985 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
2986 // Make sure the LiveIns are still sorted and unique.
2988 // Replace the edges to PrologueMBB by edges to the sequences
2989 // we are about to add, but only update for immediate predecessors.
2990 if (MBB->isSuccessor(&PrologueMBB))
2991 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
2992 }
2993
2994 // The required stack size that is aligned to ARM constant criterion.
2995 AlignedStackSize = alignToARMConstant(StackSize);
2996
2997 // When the frame size is less than 256 we just compare the stack
2998 // boundary directly to the value of the stack pointer, per gcc.
2999 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3000
3001 // We will use two of the callee save registers as scratch registers so we
3002 // need to save those registers onto the stack.
3003 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3004 // requested and arguments for __morestack().
3005 // SR0: Scratch Register #0
3006 // SR1: Scratch Register #1
3007 // push {SR0, SR1}
3008 if (Thumb) {
3009 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3011 .addReg(ScratchReg0)
3012 .addReg(ScratchReg1);
3013 } else {
3014 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3015 .addReg(ARM::SP, RegState::Define)
3016 .addReg(ARM::SP)
3018 .addReg(ScratchReg0)
3019 .addReg(ScratchReg1);
3020 }
3021
3022 // Emit the relevant DWARF information about the change in stack pointer as
3023 // well as where to find both r4 and r5 (the callee-save registers)
3024 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3025 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
3026 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3027 .addCFIIndex(CFIIndex);
3029 nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
3030 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3031 .addCFIIndex(CFIIndex);
3033 nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
3034 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3035 .addCFIIndex(CFIIndex);
3036 }
3037
3038 // mov SR1, sp
3039 if (Thumb) {
3040 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3041 .addReg(ARM::SP)
3043 } else if (CompareStackPointer) {
3044 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3045 .addReg(ARM::SP)
3047 .add(condCodeOp());
3048 }
3049
3050 // sub SR1, sp, #StackSize
3051 if (!CompareStackPointer && Thumb) {
3052 if (AlignedStackSize < 256) {
3053 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3054 .add(condCodeOp())
3055 .addReg(ScratchReg1)
3056 .addImm(AlignedStackSize)
3058 } else {
3059 if (Thumb2) {
3060 BuildMI(McrMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
3061 .addImm(AlignedStackSize);
3062 } else {
3063 auto MBBI = McrMBB->end();
3064 auto RegInfo = STI.getRegisterInfo();
3065 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3066 AlignedStackSize);
3067 }
3068 BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3069 .add(condCodeOp())
3070 .addReg(ScratchReg1)
3071 .addReg(ScratchReg0)
3073 }
3074 } else if (!CompareStackPointer) {
3075 if (AlignedStackSize < 256) {
3076 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3077 .addReg(ARM::SP)
3078 .addImm(AlignedStackSize)
3080 .add(condCodeOp());
3081 } else {
3082 auto MBBI = McrMBB->end();
3083 auto RegInfo = STI.getRegisterInfo();
3084 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3085 AlignedStackSize);
3086 BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3087 .addReg(ARM::SP)
3088 .addReg(ScratchReg0)
3090 .add(condCodeOp());
3091 }
3092 }
3093
3094 if (Thumb && ST->isThumb1Only()) {
3095 unsigned PCLabelId = ARMFI->createPICLabelUId();
3097 MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
3099 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
3100
3101 // ldr SR0, [pc, offset(STACK_LIMIT)]
3102 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3105
3106 // ldr SR0, [SR0]
3107 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3108 .addReg(ScratchReg0)
3109 .addImm(0)
3111 } else {
3112 // Get TLS base address from the coprocessor
3113 // mrc p15, #0, SR0, c13, c0, #3
3114 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3115 ScratchReg0)
3116 .addImm(15)
3117 .addImm(0)
3118 .addImm(13)
3119 .addImm(0)
3120 .addImm(3)
3122
3123 // Use the last tls slot on android and a private field of the TCP on linux.
3124 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3125 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3126
3127 // Get the stack limit from the right offset
3128 // ldr SR0, [sr0, #4 * TlsOffset]
3129 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3130 ScratchReg0)
3131 .addReg(ScratchReg0)
3132 .addImm(4 * TlsOffset)
3134 }
3135
3136 // Compare stack limit with stack size requested.
3137 // cmp SR0, SR1
3138 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3139 BuildMI(GetMBB, DL, TII.get(Opcode))
3140 .addReg(ScratchReg0)
3141 .addReg(ScratchReg1)
3143
3144 // This jump is taken if StackLimit < SP - stack required.
3145 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3146 BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
3148 .addReg(ARM::CPSR);
3149
3150
3151 // Calling __morestack(StackSize, Size of stack arguments).
3152 // __morestack knows that the stack size requested is in SR0(r4)
3153 // and amount size of stack arguments is in SR1(r5).
3154
3155 // Pass first argument for the __morestack by Scratch Register #0.
3156 // The amount size of stack required
3157 if (Thumb) {
3158 if (AlignedStackSize < 256) {
3159 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3160 .add(condCodeOp())
3161 .addImm(AlignedStackSize)
3163 } else {
3164 if (Thumb2) {
3165 BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
3166 .addImm(AlignedStackSize);
3167 } else {
3168 auto MBBI = AllocMBB->end();
3169 auto RegInfo = STI.getRegisterInfo();
3170 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3171 AlignedStackSize);
3172 }
3173 }
3174 } else {
3175 if (AlignedStackSize < 256) {
3176 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3177 .addImm(AlignedStackSize)
3179 .add(condCodeOp());
3180 } else {
3181 auto MBBI = AllocMBB->end();
3182 auto RegInfo = STI.getRegisterInfo();
3183 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3184 AlignedStackSize);
3185 }
3186 }
3187
3188 // Pass second argument for the __morestack by Scratch Register #1.
3189 // The amount size of stack consumed to save function arguments.
3190 if (Thumb) {
3191 if (ARMFI->getArgumentStackSize() < 256) {
3192 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3193 .add(condCodeOp())
3196 } else {
3197 if (Thumb2) {
3198 BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg1)
3200 } else {
3201 auto MBBI = AllocMBB->end();
3202 auto RegInfo = STI.getRegisterInfo();
3203 RegInfo->emitLoadConstPool(
3204 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3206 }
3207 }
3208 } else {
3209 if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3210 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3213 .add(condCodeOp());
3214 } else {
3215 auto MBBI = AllocMBB->end();
3216 auto RegInfo = STI.getRegisterInfo();
3217 RegInfo->emitLoadConstPool(
3218 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3220 }
3221 }
3222
3223 // push {lr} - Save return address of this function.
3224 if (Thumb) {
3225 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3227 .addReg(ARM::LR);
3228 } else {
3229 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3230 .addReg(ARM::SP, RegState::Define)
3231 .addReg(ARM::SP)
3233 .addReg(ARM::LR);
3234 }
3235
3236 // Emit the DWARF info about the change in stack as well as where to find the
3237 // previous link register
3238 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3239 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
3240 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3241 .addCFIIndex(CFIIndex);
3243 nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
3244 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3245 .addCFIIndex(CFIIndex);
3246 }
3247
3248 // Call __morestack().
3249 if (Thumb) {
3250 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3252 .addExternalSymbol("__morestack");
3253 } else {
3254 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3255 .addExternalSymbol("__morestack");
3256 }
3257
3258 // pop {lr} - Restore return address of this original function.
3259 if (Thumb) {
3260 if (ST->isThumb1Only()) {
3261 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3263 .addReg(ScratchReg0);
3264 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3265 .addReg(ScratchReg0)
3267 } else {
3268 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3269 .addReg(ARM::LR, RegState::Define)
3270 .addReg(ARM::SP, RegState::Define)
3271 .addReg(ARM::SP)
3272 .addImm(4)
3274 }
3275 } else {
3276 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3277 .addReg(ARM::SP, RegState::Define)
3278 .addReg(ARM::SP)
3280 .addReg(ARM::LR);
3281 }
3282
3283 // Restore SR0 and SR1 in case of __morestack() was called.
3284 // __morestack() will skip PostStackMBB block so we need to restore
3285 // scratch registers from here.
3286 // pop {SR0, SR1}
3287 if (Thumb) {
3288 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3290 .addReg(ScratchReg0)
3291 .addReg(ScratchReg1);
3292 } else {
3293 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3294 .addReg(ARM::SP, RegState::Define)
3295 .addReg(ARM::SP)
3297 .addReg(ScratchReg0)
3298 .addReg(ScratchReg1);
3299 }
3300
3301 // Update the CFA offset now that we've popped
3302 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3303 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3304 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3305 .addCFIIndex(CFIIndex);
3306 }
3307
3308 // Return from this function.
3309 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3310
3311 // Restore SR0 and SR1 in case of __morestack() was not called.
3312 // pop {SR0, SR1}
3313 if (Thumb) {
3314 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3316 .addReg(ScratchReg0)
3317 .addReg(ScratchReg1);
3318 } else {
3319 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3320 .addReg(ARM::SP, RegState::Define)
3321 .addReg(ARM::SP)
3323 .addReg(ScratchReg0)
3324 .addReg(ScratchReg1);
3325 }
3326
3327 // Update the CFA offset now that we've popped
3328 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3329 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3330 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3331 .addCFIIndex(CFIIndex);
3332
3333 // Tell debuggers that r4 and r5 are now the same as they were in the
3334 // previous function, that they're the "Same Value".
3336 nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
3337 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3338 .addCFIIndex(CFIIndex);
3340 nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
3341 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3342 .addCFIIndex(CFIIndex);
3343 }
3344
3345 // Organizing MBB lists
3346 PostStackMBB->addSuccessor(&PrologueMBB);
3347
3348 AllocMBB->addSuccessor(PostStackMBB);
3349
3350 GetMBB->addSuccessor(PostStackMBB);
3351 GetMBB->addSuccessor(AllocMBB);
3352
3353 McrMBB->addSuccessor(GetMBB);
3354
3355 PrevStackMBB->addSuccessor(McrMBB);
3356
3357#ifdef EXPENSIVE_CHECKS
3358 MF.verify();
3359#endif
3360}
unsigned const MachineRegisterInfo * MRI
static int64_t getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)
Returns how much of the incoming argument stack area (in bytes) we should clean up in an epilogue.
static bool needsWinCFI(const MachineFunction &MF)
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static bool isThumb(const MCSubtargetInfo &STI)
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
static void insertSEHRange(MachineBasicBlock &MBB, MachineBasicBlock::iterator Start, const MachineBasicBlock::iterator &End, const ARMBaseInstrInfo &TII, unsigned MIFlags)
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, unsigned Flags)
static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, const TargetFrameLowering &TFI)
static bool requiresAAPCSFrameRecord(const MachineFunction &MF)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, const MachineFunction &MF)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
static MachineBasicBlock::iterator initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI)
static int sizeOfSPAdjustment(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
LLVMContext & Context
const char LLVMTargetMachineRef TM
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
@ Flags
Definition: TextStubV5.cpp:93
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:467
Value * RHS
Value * LHS
static const unsigned FramePtr
bool hasBasePointer(const MachineFunction &MF) const
virtual void emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred=ARMCC::AL, Register PredReg=Register(), unsigned MIFlags=MachineInstr::NoFlags) const
emitLoadConstPool - Emits a load from constpool to materialize the specified immediate.
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool cannotEliminateFrame(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
bool canRealignStack(const MachineFunction &MF) const override
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
ARMFrameLowering(const ARMSubtarget &sti)
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
bool keepFramePointer(const MachineFunction &MF) const override
Return true if the target wants to keep the frame pointer regardless of the function attribute "frame...
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool isFPReserved(const MachineFunction &MF) const
isFPReserved - Return true if the frame pointer register should be considered a reserved register on ...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override
getCalleeSavedSpillSlots - This method returns a pointer to an array of pairs, that contains an entry...
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, int SPAdj) const
void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const override
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
const ARMSubtarget & STI
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
void setDPRCalleeSavedAreaSize(unsigned s)
unsigned getFPCXTSaveAreaSize() const
unsigned getGPRCalleeSavedArea1Size() const
unsigned getDPRCalleeSavedGapSize() const
void setGPRCalleeSavedArea2Size(unsigned s)
void setDPRCalleeSavedAreaOffset(unsigned o)
void setFramePtrSpillOffset(unsigned o)
unsigned getGPRCalleeSavedArea2Size() const
unsigned getNumAlignedDPRCS2Regs() const
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getArgumentStackToRestore() const
void setFPCXTSaveAreaSize(unsigned s)
unsigned getDPRCalleeSavedAreaSize() const
unsigned getFramePtrSpillOffset() const
unsigned getArgRegsSaveSize() const
void setGPRCalleeSavedArea2Offset(unsigned o)
void setGPRCalleeSavedArea1Offset(unsigned o)
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getArgumentStackSize() const
unsigned getReturnRegsCount() const
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:262
bool isTargetWindows() const
Definition: ARMSubtarget.h:369
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:274
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11),...
Definition: ARMSubtarget.h:442
bool splitFramePointerPush(const MachineFunction &MF) const
bool isTargetELF() const
Definition: ARMSubtarget.h:372
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:319
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:187
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:644
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:797
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:547
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register)
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:616
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:540
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:533
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:571
Context object for machine code objects.
Definition: MCContext.h:76
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
iterator_range< livein_iterator > liveins() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to 'Old', change the code and CFG so that it branches to 'N...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
int getStackProtectorIndex() const
Return the index for the stack protector object.
int getOffsetAdjustment() const
Return the correction for frame offsets.
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
This class contains meta information specific to a module.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
typename SuperClass::iterator iterator
Definition: SmallVector.h:581
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:36
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:52
Information about stack frame layout on the target.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment.
virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const
getFrameIndexReference - This method should return the base register and offset used to reference a f...
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
Definition: Value.h:74
self_iterator getIterator()
Definition: ilist_node.h:82
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1839
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isSplitFPArea1Register(unsigned Reg, bool SplitFramePushPop)
static bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
static bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop)
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1744
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:2011
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1846
static bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
static bool isSplitFPArea2Register(unsigned Reg, bool SplitFramePushPop)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85