LLVM 20.0.0git
ARMFrameLowering.cpp
Go to the documentation of this file.
1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
115#include "ARMSubtarget.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCContext.h"
144#include "llvm/MC/MCDwarf.h"
145#include "llvm/MC/MCInstrDesc.h"
147#include "llvm/Support/CodeGen.h"
150#include "llvm/Support/Debug.h"
156#include <algorithm>
157#include <cassert>
158#include <cstddef>
159#include <cstdint>
160#include <iterator>
161#include <utility>
162#include <vector>
163
164#define DEBUG_TYPE "arm-frame-lowering"
165
166using namespace llvm;
167
168static cl::opt<bool>
169SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
170 cl::desc("Align ARM NEON spills in prolog and epilog"));
171
174 unsigned NumAlignedDPRCS2Regs);
175
177 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
178 STI(sti) {}
179
181 // iOS always has a FP for backtracking, force other targets to keep their FP
182 // when doing FastISel. The emitted code is currently superior, and in cases
183 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
184 return MF.getSubtarget<ARMSubtarget>().useFastISel();
185}
186
187/// Returns true if the target can safely skip saving callee-saved registers
188/// for noreturn nounwind functions.
190 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
191 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
192 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
193
194 // Frame pointer and link register are not treated as normal CSR, thus we
195 // can always skip CSR saves for nonreturning functions.
196 return true;
197}
198
199/// hasFP - Return true if the specified function should have a dedicated frame
200/// pointer register. This is true if the function has variable sized allocas
201/// or if frame pointer elimination is disabled.
203 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
204 const MachineFrameInfo &MFI = MF.getFrameInfo();
205
206 // ABI-required frame pointer.
208 return true;
209
210 // Frame pointer required for use within this function.
211 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
212 MFI.isFrameAddressTaken());
213}
214
215/// isFPReserved - Return true if the frame pointer register should be
216/// considered a reserved register on the scope of the specified function.
218 return hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF);
219}
220
221/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
222/// not required, we reserve argument space for call sites in the function
223/// immediately on entry to the current function. This eliminates the need for
224/// add/sub sp brackets around call sites. Returns true if the call frame is
225/// included as part of the stack frame.
227 const MachineFrameInfo &MFI = MF.getFrameInfo();
228 unsigned CFSize = MFI.getMaxCallFrameSize();
229 // It's not always a good idea to include the call frame as part of the
230 // stack frame. ARM (especially Thumb) has small immediate offset to
231 // address the stack frame. So a large call frame can cause poor codegen
232 // and may even makes it impossible to scavenge a register.
233 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
234 return false;
235
236 return !MFI.hasVarSizedObjects();
237}
238
239/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
240/// call frame pseudos can be simplified. Unlike most targets, having a FP
241/// is not sufficient here since we still may reference some objects via SP
242/// even when FP is available in Thumb2 mode.
243bool
246}
247
248// Returns how much of the incoming argument stack area we should clean up in an
249// epilogue. For the C calling convention this will be 0, for guaranteed tail
250// call conventions it can be positive (a normal return or a tail call to a
251// function that uses less stack space for arguments) or negative (for a tail
252// call to a function that needs more stack space than us for arguments).
256 bool IsTailCallReturn = false;
257 if (MBB.end() != MBBI) {
258 unsigned RetOpcode = MBBI->getOpcode();
259 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
260 RetOpcode == ARM::TCRETURNri ||
261 RetOpcode == ARM::TCRETURNrinotr12;
262 }
264
265 int ArgumentPopSize = 0;
266 if (IsTailCallReturn) {
267 MachineOperand &StackAdjust = MBBI->getOperand(1);
268
269 // For a tail-call in a callee-pops-arguments environment, some or all of
270 // the stack may actually be in use for the call's arguments, this is
271 // calculated during LowerCall and consumed here...
272 ArgumentPopSize = StackAdjust.getImm();
273 } else {
274 // ... otherwise the amount to pop is *all* of the argument space,
275 // conveniently stored in the MachineFunctionInfo by
276 // LowerFormalArguments. This will, of course, be zero for the C calling
277 // convention.
278 ArgumentPopSize = AFI->getArgumentStackToRestore();
279 }
280
281 return ArgumentPopSize;
282}
283
284static bool needsWinCFI(const MachineFunction &MF) {
285 const Function &F = MF.getFunction();
286 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
287 F.needsUnwindTableEntry();
288}
289
290// Given a load or a store instruction, generate an appropriate unwinding SEH
291// code on Windows.
293 const TargetInstrInfo &TII,
294 unsigned Flags) {
295 unsigned Opc = MBBI->getOpcode();
297 MachineFunction &MF = *MBB->getParent();
298 DebugLoc DL = MBBI->getDebugLoc();
300 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
301 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
302
303 Flags |= MachineInstr::NoMerge;
304
305 switch (Opc) {
306 default:
307 report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
308 break;
309 case ARM::t2ADDri: // add.w r11, sp, #xx
310 case ARM::t2ADDri12: // add.w r11, sp, #xx
311 case ARM::t2MOVTi16: // movt r4, #xx
312 case ARM::tBL: // bl __chkstk
313 // These are harmless if used for just setting up a frame pointer,
314 // but that frame pointer can't be relied upon for unwinding, unless
315 // set up with SEH_SaveSP.
316 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
317 .addImm(/*Wide=*/1)
318 .setMIFlags(Flags);
319 break;
320
321 case ARM::t2MOVi16: { // mov(w) r4, #xx
322 bool Wide = MBBI->getOperand(1).getImm() >= 256;
323 if (!Wide) {
324 MachineInstrBuilder NewInstr =
325 BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
326 NewInstr.add(MBBI->getOperand(0));
327 NewInstr.add(t1CondCodeOp(/*isDead=*/true));
328 for (MachineOperand &MO : llvm::drop_begin(MBBI->operands()))
329 NewInstr.add(MO);
330 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
331 MBB->erase(MBBI);
332 MBBI = NewMBBI;
333 }
334 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
335 break;
336 }
337
338 case ARM::tBLXr: // blx r12 (__chkstk)
339 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
340 .addImm(/*Wide=*/0)
341 .setMIFlags(Flags);
342 break;
343
344 case ARM::t2MOVi32imm: // movw+movt
345 // This pseudo instruction expands into two mov instructions. If the
346 // second operand is a symbol reference, this will stay as two wide
347 // instructions, movw+movt. If they're immediates, the first one can
348 // end up as a narrow mov though.
349 // As two SEH instructions are appended here, they won't get interleaved
350 // between the two final movw/movt instructions, but it doesn't make any
351 // practical difference.
352 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
353 .addImm(/*Wide=*/1)
354 .setMIFlags(Flags);
355 MBB->insertAfter(MBBI, MIB);
356 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
357 .addImm(/*Wide=*/1)
358 .setMIFlags(Flags);
359 break;
360
361 case ARM::t2STR_PRE:
362 if (MBBI->getOperand(0).getReg() == ARM::SP &&
363 MBBI->getOperand(2).getReg() == ARM::SP &&
364 MBBI->getOperand(3).getImm() == -4) {
365 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
366 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
367 .addImm(1ULL << Reg)
368 .addImm(/*Wide=*/1)
369 .setMIFlags(Flags);
370 } else {
371 report_fatal_error("No matching SEH Opcode for t2STR_PRE");
372 }
373 break;
374
375 case ARM::t2LDR_POST:
376 if (MBBI->getOperand(1).getReg() == ARM::SP &&
377 MBBI->getOperand(2).getReg() == ARM::SP &&
378 MBBI->getOperand(3).getImm() == 4) {
379 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
380 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
381 .addImm(1ULL << Reg)
382 .addImm(/*Wide=*/1)
383 .setMIFlags(Flags);
384 } else {
385 report_fatal_error("No matching SEH Opcode for t2LDR_POST");
386 }
387 break;
388
389 case ARM::t2LDMIA_RET:
390 case ARM::t2LDMIA_UPD:
391 case ARM::t2STMDB_UPD: {
392 unsigned Mask = 0;
393 bool Wide = false;
394 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
395 const MachineOperand &MO = MBBI->getOperand(i);
396 if (!MO.isReg() || MO.isImplicit())
397 continue;
398 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
399 if (Reg == 15)
400 Reg = 14;
401 if (Reg >= 8 && Reg <= 13)
402 Wide = true;
403 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
404 Wide = true;
405 Mask |= 1 << Reg;
406 }
407 if (!Wide) {
408 unsigned NewOpc;
409 switch (Opc) {
410 case ARM::t2LDMIA_RET:
411 NewOpc = ARM::tPOP_RET;
412 break;
413 case ARM::t2LDMIA_UPD:
414 NewOpc = ARM::tPOP;
415 break;
416 case ARM::t2STMDB_UPD:
417 NewOpc = ARM::tPUSH;
418 break;
419 default:
421 }
422 MachineInstrBuilder NewInstr =
423 BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
424 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
425 NewInstr.add(MBBI->getOperand(i));
426 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
427 MBB->erase(MBBI);
428 MBBI = NewMBBI;
429 }
430 unsigned SEHOpc =
431 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
432 MIB = BuildMI(MF, DL, TII.get(SEHOpc))
433 .addImm(Mask)
434 .addImm(Wide ? 1 : 0)
435 .setMIFlags(Flags);
436 break;
437 }
438 case ARM::VSTMDDB_UPD:
439 case ARM::VLDMDIA_UPD: {
440 int First = -1, Last = 0;
441 for (const MachineOperand &MO : llvm::drop_begin(MBBI->operands(), 4)) {
442 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
443 if (First == -1)
444 First = Reg;
445 Last = Reg;
446 }
447 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
448 .addImm(First)
449 .addImm(Last)
450 .setMIFlags(Flags);
451 break;
452 }
453 case ARM::tSUBspi:
454 case ARM::tADDspi:
455 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
456 .addImm(MBBI->getOperand(2).getImm() * 4)
457 .addImm(/*Wide=*/0)
458 .setMIFlags(Flags);
459 break;
460 case ARM::t2SUBspImm:
461 case ARM::t2SUBspImm12:
462 case ARM::t2ADDspImm:
463 case ARM::t2ADDspImm12:
464 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
465 .addImm(MBBI->getOperand(2).getImm())
466 .addImm(/*Wide=*/1)
467 .setMIFlags(Flags);
468 break;
469
470 case ARM::tMOVr:
471 if (MBBI->getOperand(1).getReg() == ARM::SP &&
472 (Flags & MachineInstr::FrameSetup)) {
473 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
474 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
475 .addImm(Reg)
476 .setMIFlags(Flags);
477 } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
478 (Flags & MachineInstr::FrameDestroy)) {
479 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
480 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
481 .addImm(Reg)
482 .setMIFlags(Flags);
483 } else {
484 report_fatal_error("No SEH Opcode for MOV");
485 }
486 break;
487
488 case ARM::tBX_RET:
489 case ARM::TCRETURNri:
490 case ARM::TCRETURNrinotr12:
491 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
492 .addImm(/*Wide=*/0)
493 .setMIFlags(Flags);
494 break;
495
496 case ARM::TCRETURNdi:
497 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
498 .addImm(/*Wide=*/1)
499 .setMIFlags(Flags);
500 break;
501 }
502 return MBB->insertAfter(MBBI, MIB);
503}
504
507 if (MBBI == MBB.begin())
509 return std::prev(MBBI);
510}
511
515 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
516 if (Start.isValid())
517 Start = std::next(Start);
518 else
519 Start = MBB.begin();
520
521 for (auto MI = Start; MI != End;) {
522 auto Next = std::next(MI);
523 // Check if this instruction already has got a SEH opcode added. In that
524 // case, don't do this generic mapping.
525 if (Next != End && isSEHInstruction(*Next)) {
526 MI = std::next(Next);
527 while (MI != End && isSEHInstruction(*MI))
528 ++MI;
529 continue;
530 }
531 insertSEH(MI, TII, MIFlags);
532 MI = Next;
533 }
534}
535
538 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
539 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
540 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
541 if (isARM)
542 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
543 Pred, PredReg, TII, MIFlags);
544 else
545 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
546 Pred, PredReg, TII, MIFlags);
547}
548
549static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
551 const ARMBaseInstrInfo &TII, int NumBytes,
552 unsigned MIFlags = MachineInstr::NoFlags,
554 unsigned PredReg = 0) {
555 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
556 MIFlags, Pred, PredReg);
557}
558
560 int RegSize;
561 switch (MI.getOpcode()) {
562 case ARM::VSTMDDB_UPD:
563 RegSize = 8;
564 break;
565 case ARM::STMDB_UPD:
566 case ARM::t2STMDB_UPD:
567 RegSize = 4;
568 break;
569 case ARM::t2STR_PRE:
570 case ARM::STR_PRE_IMM:
571 return 4;
572 default:
573 llvm_unreachable("Unknown push or pop like instruction");
574 }
575
576 int count = 0;
577 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
578 // pred) so the list starts at 4.
579 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
580 count += RegSize;
581 return count;
582}
583
585 size_t StackSizeInBytes) {
586 const MachineFrameInfo &MFI = MF.getFrameInfo();
587 const Function &F = MF.getFunction();
588 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
589
590 StackProbeSize =
591 F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);
592 return (StackSizeInBytes >= StackProbeSize) &&
593 !F.hasFnAttribute("no-stack-arg-probe");
594}
595
596namespace {
597
598struct StackAdjustingInsts {
599 struct InstInfo {
601 unsigned SPAdjust;
602 bool BeforeFPSet;
603 };
604
606
607 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
608 bool BeforeFPSet = false) {
609 InstInfo Info = {I, SPAdjust, BeforeFPSet};
610 Insts.push_back(Info);
611 }
612
613 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
614 auto Info =
615 llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
616 assert(Info != Insts.end() && "invalid sp adjusting instruction");
617 Info->SPAdjust += ExtraBytes;
618 }
619
620 void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
621 const ARMBaseInstrInfo &TII, bool HasFP) {
623 unsigned CFAOffset = 0;
624 for (auto &Info : Insts) {
625 if (HasFP && !Info.BeforeFPSet)
626 return;
627
628 CFAOffset += Info.SPAdjust;
629 unsigned CFIIndex = MF.addFrameInst(
630 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
631 BuildMI(MBB, std::next(Info.I), dl,
632 TII.get(TargetOpcode::CFI_INSTRUCTION))
633 .addCFIIndex(CFIIndex)
635 }
636 }
637};
638
639} // end anonymous namespace
640
641/// Emit an instruction sequence that will align the address in
642/// register Reg by zero-ing out the lower bits. For versions of the
643/// architecture that support Neon, this must be done in a single
644/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
645/// single instruction. That function only gets called when optimizing
646/// spilling of D registers on a core with the Neon instruction set
647/// present.
649 const TargetInstrInfo &TII,
652 const DebugLoc &DL, const unsigned Reg,
653 const Align Alignment,
654 const bool MustBeSingleInstruction) {
655 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
656 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
657 const unsigned AlignMask = Alignment.value() - 1U;
658 const unsigned NrBitsToZero = Log2(Alignment);
659 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
660 if (!AFI->isThumbFunction()) {
661 // if the BFC instruction is available, use that to zero the lower
662 // bits:
663 // bfc Reg, #0, log2(Alignment)
664 // otherwise use BIC, if the mask to zero the required number of bits
665 // can be encoded in the bic immediate field
666 // bic Reg, Reg, Alignment-1
667 // otherwise, emit
668 // lsr Reg, Reg, log2(Alignment)
669 // lsl Reg, Reg, log2(Alignment)
670 if (CanUseBFC) {
671 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
673 .addImm(~AlignMask)
675 } else if (AlignMask <= 255) {
676 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
678 .addImm(AlignMask)
680 .add(condCodeOp());
681 } else {
682 assert(!MustBeSingleInstruction &&
683 "Shouldn't call emitAligningInstructions demanding a single "
684 "instruction to be emitted for large stack alignment for a target "
685 "without BFC.");
686 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
688 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
690 .add(condCodeOp());
691 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
693 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
695 .add(condCodeOp());
696 }
697 } else {
698 // Since this is only reached for Thumb-2 targets, the BFC instruction
699 // should always be available.
700 assert(CanUseBFC);
701 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
703 .addImm(~AlignMask)
705 }
706}
707
708/// We need the offset of the frame pointer relative to other MachineFrameInfo
709/// offsets which are encoded relative to SP at function begin.
710/// See also emitPrologue() for how the FP is set up.
711/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
712/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
713/// this to produce a conservative estimate that we check in an assert() later.
714static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
715 const MachineFunction &MF) {
716 // For Thumb1, push.w isn't available, so the first push will always push
717 // r7 and lr onto the stack first.
718 if (AFI.isThumb1OnlyFunction())
719 return -AFI.getArgRegsSaveSize() - (2 * 4);
720 // This is a conservative estimation: Assume the frame pointer being r7 and
721 // pc("r15") up to r8 getting spilled before (= 8 registers).
722 int MaxRegBytes = 8 * 4;
723 if (STI.splitFramePointerPush(MF)) {
724 // Here, r11 can be stored below all of r4-r15 (3 registers more than
725 // above), plus d8-d15.
726 MaxRegBytes = 11 * 4 + 8 * 8;
727 }
728 int FPCXTSaveSize =
729 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
730 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
731}
732
734 MachineBasicBlock &MBB) const {
736 MachineFrameInfo &MFI = MF.getFrameInfo();
738 MCContext &Context = MF.getContext();
739 const TargetMachine &TM = MF.getTarget();
740 const MCRegisterInfo *MRI = Context.getRegisterInfo();
741 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
744 "This emitPrologue does not support Thumb1!");
745 bool isARM = !AFI->isThumbFunction();
746 Align Alignment = STI.getFrameLowering()->getStackAlign();
747 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
748 unsigned NumBytes = MFI.getStackSize();
749 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
750 int FPCXTSaveSize = 0;
751 bool NeedsWinCFI = needsWinCFI(MF);
752
753 // Debug location must be unknown since the first debug location is used
754 // to determine the end of the prologue.
755 DebugLoc dl;
756
757 Register FramePtr = RegInfo->getFrameRegister(MF);
758
759 // Determine the sizes of each callee-save spill areas and record which frame
760 // belongs to which callee-save spill areas.
761 unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
762 int FramePtrSpillFI = 0;
763 int D8SpillFI = 0;
764
765 // All calls are tail calls in GHC calling conv, and functions have no
766 // prologue/epilogue.
768 return;
769
770 StackAdjustingInsts DefCFAOffsetCandidates;
771 bool HasFP = hasFP(MF);
772
773 if (!AFI->hasStackFrame() &&
774 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
775 if (NumBytes != 0) {
776 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
778 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
779 }
780 if (!NeedsWinCFI)
781 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
782 if (NeedsWinCFI && MBBI != MBB.begin()) {
784 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
786 MF.setHasWinCFI(true);
787 }
788 return;
789 }
790
791 // Determine spill area sizes.
792 if (STI.splitFramePointerPush(MF)) {
793 for (const CalleeSavedInfo &I : CSI) {
794 Register Reg = I.getReg();
795 int FI = I.getFrameIdx();
796 switch (Reg) {
797 case ARM::R11:
798 case ARM::LR:
799 if (Reg == FramePtr)
800 FramePtrSpillFI = FI;
801 GPRCS2Size += 4;
802 break;
803 case ARM::R0:
804 case ARM::R1:
805 case ARM::R2:
806 case ARM::R3:
807 case ARM::R4:
808 case ARM::R5:
809 case ARM::R6:
810 case ARM::R7:
811 case ARM::R8:
812 case ARM::R9:
813 case ARM::R10:
814 case ARM::R12:
815 GPRCS1Size += 4;
816 break;
817 case ARM::FPCXTNS:
818 FPCXTSaveSize = 4;
819 break;
820 default:
821 // This is a DPR. Exclude the aligned DPRCS2 spills.
822 if (Reg == ARM::D8)
823 D8SpillFI = FI;
824 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
825 DPRCSSize += 8;
826 }
827 }
828 } else {
829 for (const CalleeSavedInfo &I : CSI) {
830 Register Reg = I.getReg();
831 int FI = I.getFrameIdx();
832 switch (Reg) {
833 case ARM::R8:
834 case ARM::R9:
835 case ARM::R10:
836 case ARM::R11:
837 case ARM::R12:
838 if (STI.splitFramePushPop(MF)) {
839 GPRCS2Size += 4;
840 break;
841 }
842 [[fallthrough]];
843 case ARM::R0:
844 case ARM::R1:
845 case ARM::R2:
846 case ARM::R3:
847 case ARM::R4:
848 case ARM::R5:
849 case ARM::R6:
850 case ARM::R7:
851 case ARM::LR:
852 if (Reg == FramePtr)
853 FramePtrSpillFI = FI;
854 GPRCS1Size += 4;
855 break;
856 case ARM::FPCXTNS:
857 FPCXTSaveSize = 4;
858 break;
859 default:
860 // This is a DPR. Exclude the aligned DPRCS2 spills.
861 if (Reg == ARM::D8)
862 D8SpillFI = FI;
863 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
864 DPRCSSize += 8;
865 }
866 }
867 }
868
869 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
870
871 // Move past the PAC computation.
872 if (AFI->shouldSignReturnAddress())
873 LastPush = MBBI++;
874
875 // Move past FPCXT area.
876 if (FPCXTSaveSize > 0) {
877 LastPush = MBBI++;
878 DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
879 }
880
881 // Allocate the vararg register save area.
882 if (ArgRegsSaveSize) {
883 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
885 LastPush = std::prev(MBBI);
886 DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
887 }
888
889 // Move past area 1.
890 if (GPRCS1Size > 0) {
891 GPRCS1Push = LastPush = MBBI++;
892 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
893 }
894
895 // Determine starting offsets of spill areas.
896 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
897 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
898 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
899 Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
900 unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
901 if (!STI.splitFramePointerPush(MF)) {
902 DPRGapSize += GPRCS2Size;
903 }
904 DPRGapSize %= DPRAlign.value();
905
906 unsigned DPRCSOffset;
907 if (STI.splitFramePointerPush(MF)) {
908 DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
909 GPRCS2Offset = DPRCSOffset - GPRCS2Size;
910 } else {
911 DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
912 }
913 int FramePtrOffsetInPush = 0;
914 if (HasFP) {
915 int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
916 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
917 "Max FP estimation is wrong");
918 FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
919 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
920 NumBytes);
921 }
922 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
923 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
924 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
925
926 // Move past area 2.
927 if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
928 GPRCS2Push = LastPush = MBBI++;
929 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
930 }
931
932 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
933 // .cfi_offset operations will reflect that.
934 if (DPRGapSize) {
935 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
936 if (LastPush != MBB.end() &&
937 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
938 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
939 else {
940 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
942 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
943 }
944 }
945
946 // Move past area 3.
947 if (DPRCSSize > 0) {
948 // Since vpush register list cannot have gaps, there may be multiple vpush
949 // instructions in the prologue.
950 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
951 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
952 LastPush = MBBI++;
953 }
954 }
955
956 // Move past the aligned DPRCS2 area.
957 if (AFI->getNumAlignedDPRCS2Regs() > 0) {
959 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
960 // leaves the stack pointer pointing to the DPRCS2 area.
961 //
962 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
963 NumBytes += MFI.getObjectOffset(D8SpillFI);
964 } else
965 NumBytes = DPRCSOffset;
966
967 if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
968 GPRCS2Push = LastPush = MBBI++;
969 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
970 }
971
972 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
973 if (STI.splitFramePointerPush(MF) && HasFP)
974 NeedsWinCFIStackAlloc = false;
975
976 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
977 uint32_t NumWords = NumBytes >> 2;
978
979 if (NumWords < 65536) {
980 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
981 .addImm(NumWords)
984 } else {
985 // Split into two instructions here, instead of using t2MOVi32imm,
986 // to allow inserting accurate SEH instructions (including accurate
987 // instruction size for each of them).
988 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
989 .addImm(NumWords & 0xffff)
992 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
993 .addReg(ARM::R4)
994 .addImm(NumWords >> 16)
997 }
998
999 switch (TM.getCodeModel()) {
1000 case CodeModel::Tiny:
1001 llvm_unreachable("Tiny code model not available on ARM.");
1002 case CodeModel::Small:
1003 case CodeModel::Medium:
1004 case CodeModel::Kernel:
1005 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1007 .addExternalSymbol("__chkstk")
1008 .addReg(ARM::R4, RegState::Implicit)
1010 break;
1011 case CodeModel::Large:
1012 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1013 .addExternalSymbol("__chkstk")
1015
1016 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1018 .addReg(ARM::R12, RegState::Kill)
1019 .addReg(ARM::R4, RegState::Implicit)
1021 break;
1022 }
1023
1024 MachineInstrBuilder Instr, SEH;
1025 Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1026 .addReg(ARM::SP, RegState::Kill)
1027 .addReg(ARM::R4, RegState::Kill)
1030 .add(condCodeOp());
1031 if (NeedsWinCFIStackAlloc) {
1032 SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1033 .addImm(NumBytes)
1034 .addImm(/*Wide=*/1)
1036 MBB.insertAfter(Instr, SEH);
1037 }
1038 NumBytes = 0;
1039 }
1040
1041 if (NumBytes) {
1042 // Adjust SP after all the callee-save spills.
1043 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1044 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
1045 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
1046 else {
1047 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
1049 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
1050 }
1051
1052 if (HasFP && isARM)
1053 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1054 // Note it's not safe to do this in Thumb2 mode because it would have
1055 // taken two instructions:
1056 // mov sp, r7
1057 // sub sp, #24
1058 // If an interrupt is taken between the two instructions, then sp is in
1059 // an inconsistent state (pointing to the middle of callee-saved area).
1060 // The interrupt handler can end up clobbering the registers.
1061 AFI->setShouldRestoreSPFromFP(true);
1062 }
1063
1064 // Set FP to point to the stack slot that contains the previous FP.
1065 // For iOS, FP is R7, which has now been stored in spill area 1.
1066 // Otherwise, if this is not iOS, all the callee-saved registers go
1067 // into spill area 1, including the FP in R11. In either case, it
1068 // is in area one and the adjustment needs to take place just after
1069 // that push.
1070 // FIXME: The above is not necessary true when PACBTI is enabled.
1071 // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1072 // so FP ends up on area two.
1074 if (HasFP) {
1075 AfterPush = std::next(GPRCS1Push);
1076 unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
1077 int FPOffset = PushSize + FramePtrOffsetInPush;
1078 if (STI.splitFramePointerPush(MF)) {
1079 AfterPush = std::next(GPRCS2Push);
1080 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1081 FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
1082 } else {
1083 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1084 FramePtr, ARM::SP, FPOffset,
1086 }
1087 if (!NeedsWinCFI) {
1088 if (FramePtrOffsetInPush + PushSize != 0) {
1089 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
1090 nullptr, MRI->getDwarfRegNum(FramePtr, true),
1091 FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1092 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1093 .addCFIIndex(CFIIndex)
1095 } else {
1096 unsigned CFIIndex =
1098 nullptr, MRI->getDwarfRegNum(FramePtr, true)));
1099 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1100 .addCFIIndex(CFIIndex)
1102 }
1103 }
1104 }
1105
1106 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1107 // instructions below don't need to be replayed to unwind the stack.
1108 if (NeedsWinCFI && MBBI != MBB.begin()) {
1110 if (HasFP && STI.splitFramePointerPush(MF))
1111 End = AfterPush;
1113 BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1115 MF.setHasWinCFI(true);
1116 }
1117
1118 // Now that the prologue's actual instructions are finalised, we can insert
1119 // the necessary DWARF cf instructions to describe the situation. Start by
1120 // recording where each register ended up:
1121 if (GPRCS1Size > 0 && !NeedsWinCFI) {
1122 MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
1123 int CFIIndex;
1124 for (const auto &Entry : CSI) {
1125 Register Reg = Entry.getReg();
1126 int FI = Entry.getFrameIdx();
1127 switch (Reg) {
1128 case ARM::R8:
1129 case ARM::R9:
1130 case ARM::R10:
1131 case ARM::R11:
1132 case ARM::R12:
1133 if (STI.splitFramePushPop(MF))
1134 break;
1135 [[fallthrough]];
1136 case ARM::R0:
1137 case ARM::R1:
1138 case ARM::R2:
1139 case ARM::R3:
1140 case ARM::R4:
1141 case ARM::R5:
1142 case ARM::R6:
1143 case ARM::R7:
1144 case ARM::LR:
1146 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
1147 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1148 .addCFIIndex(CFIIndex)
1150 break;
1151 }
1152 }
1153 }
1154
1155 if (GPRCS2Size > 0 && !NeedsWinCFI) {
1156 MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
1157 for (const auto &Entry : CSI) {
1158 Register Reg = Entry.getReg();
1159 int FI = Entry.getFrameIdx();
1160 switch (Reg) {
1161 case ARM::R8:
1162 case ARM::R9:
1163 case ARM::R10:
1164 case ARM::R11:
1165 case ARM::R12:
1166 if (STI.splitFramePushPop(MF)) {
1167 unsigned DwarfReg = MRI->getDwarfRegNum(
1168 Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
1169 int64_t Offset = MFI.getObjectOffset(FI);
1170 unsigned CFIIndex = MF.addFrameInst(
1171 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1172 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1173 .addCFIIndex(CFIIndex)
1175 }
1176 break;
1177 }
1178 }
1179 }
1180
1181 if (DPRCSSize > 0 && !NeedsWinCFI) {
1182 // Since vpush register list cannot have gaps, there may be multiple vpush
1183 // instructions in the prologue.
1184 MachineBasicBlock::iterator Pos = std::next(LastPush);
1185 for (const auto &Entry : CSI) {
1186 Register Reg = Entry.getReg();
1187 int FI = Entry.getFrameIdx();
1188 if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
1189 (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
1190 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
1191 int64_t Offset = MFI.getObjectOffset(FI);
1192 unsigned CFIIndex = MF.addFrameInst(
1193 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1194 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1195 .addCFIIndex(CFIIndex)
1197 }
1198 }
1199 }
1200
1201 // Now we can emit descriptions of where the canonical frame address was
1202 // throughout the process. If we have a frame pointer, it takes over the job
1203 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1204 // actually get emitted.
1205 if (!NeedsWinCFI)
1206 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
1207
1208 if (STI.isTargetELF() && hasFP(MF))
1210 AFI->getFramePtrSpillOffset());
1211
1212 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1213 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1214 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1215 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1216 AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
1217
1218 // If we need dynamic stack realignment, do it here. Be paranoid and make
1219 // sure if we also have VLAs, we have a base pointer for frame access.
1220 // If aligned NEON registers were spilled, the stack has already been
1221 // realigned.
1222 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1223 Align MaxAlign = MFI.getMaxAlign();
1225 if (!AFI->isThumbFunction()) {
1226 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1227 false);
1228 } else {
1229 // We cannot use sp as source/dest register here, thus we're using r4 to
1230 // perform the calculations. We're emitting the following sequence:
1231 // mov r4, sp
1232 // -- use emitAligningInstructions to produce best sequence to zero
1233 // -- out lower bits in r4
1234 // mov sp, r4
1235 // FIXME: It will be better just to find spare register here.
1236 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1237 .addReg(ARM::SP, RegState::Kill)
1239 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1240 false);
1241 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1242 .addReg(ARM::R4, RegState::Kill)
1244 }
1245
1246 AFI->setShouldRestoreSPFromFP(true);
1247 }
1248
1249 // If we need a base pointer, set it up here. It's whatever the value
1250 // of the stack pointer is at this point. Any variable size objects
1251 // will be allocated after this, so we can still use the base pointer
1252 // to reference locals.
1253 // FIXME: Clarify FrameSetup flags here.
1254 if (RegInfo->hasBasePointer(MF)) {
1255 if (isARM)
1256 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1257 .addReg(ARM::SP)
1259 .add(condCodeOp());
1260 else
1261 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1262 .addReg(ARM::SP)
1264 }
1265
1266 // If the frame has variable sized objects then the epilogue must restore
1267 // the sp from fp. We can assume there's an FP here since hasFP already
1268 // checks for hasVarSizedObjects.
1269 if (MFI.hasVarSizedObjects())
1270 AFI->setShouldRestoreSPFromFP(true);
1271}
1272
1274 MachineBasicBlock &MBB) const {
1275 MachineFrameInfo &MFI = MF.getFrameInfo();
1277 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1278 const ARMBaseInstrInfo &TII =
1279 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1280 assert(!AFI->isThumb1OnlyFunction() &&
1281 "This emitEpilogue does not support Thumb1!");
1282 bool isARM = !AFI->isThumbFunction();
1283
1284 // Amount of stack space we reserved next to incoming args for either
1285 // varargs registers or stack arguments in tail calls made by this function.
1286 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1287
1288 // How much of the stack used by incoming arguments this function is expected
1289 // to restore in this particular epilogue.
1290 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1291 int NumBytes = (int)MFI.getStackSize();
1292 Register FramePtr = RegInfo->getFrameRegister(MF);
1293
1294 // All calls are tail calls in GHC calling conv, and functions have no
1295 // prologue/epilogue.
1297 return;
1298
1299 // First put ourselves on the first (from top) terminator instructions.
1301 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1302
1303 MachineBasicBlock::iterator RangeStart;
1304 if (!AFI->hasStackFrame()) {
1305 if (MF.hasWinCFI()) {
1306 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1308 RangeStart = initMBBRange(MBB, MBBI);
1309 }
1310
1311 if (NumBytes + IncomingArgStackToRestore != 0)
1312 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1313 NumBytes + IncomingArgStackToRestore,
1315 } else {
1316 // Unwind MBBI to point to first LDR / VLDRD.
1317 if (MBBI != MBB.begin()) {
1318 do {
1319 --MBBI;
1320 } while (MBBI != MBB.begin() &&
1322 if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1323 ++MBBI;
1324 }
1325
1326 if (MF.hasWinCFI()) {
1327 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1329 RangeStart = initMBBRange(MBB, MBBI);
1330 }
1331
1332 // Move SP to start of FP callee save spill area.
1333 NumBytes -= (ReservedArgStack +
1334 AFI->getFPCXTSaveAreaSize() +
1339
1340 // Reset SP based on frame pointer only if the stack frame extends beyond
1341 // frame pointer stack slot or target is ELF and the function has FP.
1342 if (AFI->shouldRestoreSPFromFP()) {
1343 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1344 if (NumBytes) {
1345 if (isARM)
1346 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1347 ARMCC::AL, 0, TII,
1349 else {
1350 // It's not possible to restore SP from FP in a single instruction.
1351 // For iOS, this looks like:
1352 // mov sp, r7
1353 // sub sp, #24
1354 // This is bad, if an interrupt is taken after the mov, sp is in an
1355 // inconsistent state.
1356 // Use the first callee-saved register as a scratch register.
1357 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1358 "No scratch register to restore SP from FP!");
1359 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1361 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1362 .addReg(ARM::R4)
1365 }
1366 } else {
1367 // Thumb2 or ARM.
1368 if (isARM)
1369 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1372 .add(condCodeOp())
1374 else
1375 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1379 }
1380 } else if (NumBytes &&
1381 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1382 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1384
1385 // Increment past our save areas.
1387 MBBI++;
1388
1389 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
1390 MBBI++;
1391 // Since vpop register list cannot have gaps, there may be multiple vpop
1392 // instructions in the epilogue.
1393 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1394 MBBI++;
1395 }
1396 if (AFI->getDPRCalleeSavedGapSize()) {
1397 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1398 "unexpected DPR alignment gap");
1399 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1401 }
1402
1404 MBBI++;
1405 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1406
1407 if (ReservedArgStack || IncomingArgStackToRestore) {
1408 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1409 "attempting to restore negative stack amount");
1410 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1411 ReservedArgStack + IncomingArgStackToRestore,
1413 }
1414
1415 // Validate PAC, It should have been already popped into R12. For CMSE entry
1416 // function, the validation instruction is emitted during expansion of the
1417 // tBXNS_RET, since the validation must use the value of SP at function
1418 // entry, before saving, resp. after restoring, FPCXTNS.
1419 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1420 BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1421 }
1422
1423 if (MF.hasWinCFI()) {
1425 BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1427 }
1428}
1429
1430/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1431/// debug info. It's the same as what we use for resolving the code-gen
1432/// references for now. FIXME: This can go wrong when references are
1433/// SP-relative and simple call frames aren't used.
1435 int FI,
1436 Register &FrameReg) const {
1437 return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1438}
1439
1441 int FI, Register &FrameReg,
1442 int SPAdj) const {
1443 const MachineFrameInfo &MFI = MF.getFrameInfo();
1444 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1446 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1447 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1448 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1449 bool isFixed = MFI.isFixedObjectIndex(FI);
1450
1451 FrameReg = ARM::SP;
1452 Offset += SPAdj;
1453
1454 // SP can move around if there are allocas. We may also lose track of SP
1455 // when emergency spilling inside a non-reserved call frame setup.
1456 bool hasMovingSP = !hasReservedCallFrame(MF);
1457
1458 // When dynamically realigning the stack, use the frame pointer for
1459 // parameters, and the stack/base pointer for locals.
1460 if (RegInfo->hasStackRealignment(MF)) {
1461 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1462 if (isFixed) {
1463 FrameReg = RegInfo->getFrameRegister(MF);
1464 Offset = FPOffset;
1465 } else if (hasMovingSP) {
1466 assert(RegInfo->hasBasePointer(MF) &&
1467 "VLAs and dynamic stack alignment, but missing base pointer!");
1468 FrameReg = RegInfo->getBaseRegister();
1469 Offset -= SPAdj;
1470 }
1471 return Offset;
1472 }
1473
1474 // If there is a frame pointer, use it when we can.
1475 if (hasFP(MF) && AFI->hasStackFrame()) {
1476 // Use frame pointer to reference fixed objects. Use it for locals if
1477 // there are VLAs (and thus the SP isn't reliable as a base).
1478 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1479 FrameReg = RegInfo->getFrameRegister(MF);
1480 return FPOffset;
1481 } else if (hasMovingSP) {
1482 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1483 if (AFI->isThumb2Function()) {
1484 // Try to use the frame pointer if we can, else use the base pointer
1485 // since it's available. This is handy for the emergency spill slot, in
1486 // particular.
1487 if (FPOffset >= -255 && FPOffset < 0) {
1488 FrameReg = RegInfo->getFrameRegister(MF);
1489 return FPOffset;
1490 }
1491 }
1492 } else if (AFI->isThumbFunction()) {
1493 // Prefer SP to base pointer, if the offset is suitably aligned and in
1494 // range as the effective range of the immediate offset is bigger when
1495 // basing off SP.
1496 // Use add <rd>, sp, #<imm8>
1497 // ldr <rd>, [sp, #<imm8>]
1498 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1499 return Offset;
1500 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1501 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1502 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1503 FrameReg = RegInfo->getFrameRegister(MF);
1504 return FPOffset;
1505 }
1506 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1507 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1508 FrameReg = RegInfo->getFrameRegister(MF);
1509 return FPOffset;
1510 }
1511 }
1512 // Use the base pointer if we have one.
1513 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1514 // That can happen if we forced a base pointer for a large call frame.
1515 if (RegInfo->hasBasePointer(MF)) {
1516 FrameReg = RegInfo->getBaseRegister();
1517 Offset -= SPAdj;
1518 }
1519 return Offset;
1520}
1521
1522void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1525 unsigned StmOpc, unsigned StrOpc,
1526 bool NoGap, bool (*Func)(unsigned, bool),
1527 unsigned NumAlignedDPRCS2Regs,
1528 unsigned MIFlags) const {
1529 MachineFunction &MF = *MBB.getParent();
1532
1533 DebugLoc DL;
1534
1535 using RegAndKill = std::pair<unsigned, bool>;
1536
1538 unsigned i = CSI.size();
1539 while (i != 0) {
1540 unsigned LastReg = 0;
1541 for (; i != 0; --i) {
1542 Register Reg = CSI[i-1].getReg();
1543 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1544
1545 // D-registers in the aligned area DPRCS2 are NOT spilled here.
1546 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1547 continue;
1548
1549 const MachineRegisterInfo &MRI = MF.getRegInfo();
1550 bool isLiveIn = MRI.isLiveIn(Reg);
1551 if (!isLiveIn && !MRI.isReserved(Reg))
1552 MBB.addLiveIn(Reg);
1553 // If NoGap is true, push consecutive registers and then leave the rest
1554 // for other instructions. e.g.
1555 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1556 if (NoGap && LastReg && LastReg != Reg-1)
1557 break;
1558 LastReg = Reg;
1559 // Do not set a kill flag on values that are also marked as live-in. This
1560 // happens with the @llvm-returnaddress intrinsic and with arguments
1561 // passed in callee saved registers.
1562 // Omitting the kill flags is conservatively correct even if the live-in
1563 // is not used after all.
1564 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1565 }
1566
1567 if (Regs.empty())
1568 continue;
1569
1570 llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1571 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1572 });
1573
1574 if (Regs.size() > 1 || StrOpc== 0) {
1575 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1576 .addReg(ARM::SP)
1577 .setMIFlags(MIFlags)
1579 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1580 MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1581 } else if (Regs.size() == 1) {
1582 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1583 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1584 .addReg(ARM::SP)
1585 .setMIFlags(MIFlags)
1586 .addImm(-4)
1588 }
1589 Regs.clear();
1590
1591 // Put any subsequent vpush instructions before this one: they will refer to
1592 // higher register numbers so need to be pushed first in order to preserve
1593 // monotonicity.
1594 if (MI != MBB.begin())
1595 --MI;
1596 }
1597}
1598
1599void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1602 unsigned LdmOpc, unsigned LdrOpc,
1603 bool isVarArg, bool NoGap,
1604 bool (*Func)(unsigned, bool),
1605 unsigned NumAlignedDPRCS2Regs) const {
1606 MachineFunction &MF = *MBB.getParent();
1610 bool hasPAC = AFI->shouldSignReturnAddress();
1611 DebugLoc DL;
1612 bool isTailCall = false;
1613 bool isInterrupt = false;
1614 bool isTrap = false;
1615 bool isCmseEntry = false;
1616 if (MBB.end() != MI) {
1617 DL = MI->getDebugLoc();
1618 unsigned RetOpcode = MI->getOpcode();
1619 isTailCall =
1620 (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri ||
1621 RetOpcode == ARM::TCRETURNrinotr12);
1622 isInterrupt =
1623 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1624 isTrap =
1625 RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1626 RetOpcode == ARM::tTRAP;
1627 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1628 }
1629
1631 unsigned i = CSI.size();
1632 while (i != 0) {
1633 unsigned LastReg = 0;
1634 bool DeleteRet = false;
1635 for (; i != 0; --i) {
1636 CalleeSavedInfo &Info = CSI[i-1];
1637 Register Reg = Info.getReg();
1638 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1639
1640 // The aligned reloads from area DPRCS2 are not inserted here.
1641 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1642 continue;
1643 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1644 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1645 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1647 Reg = ARM::PC;
1648 // Fold the return instruction into the LDM.
1649 DeleteRet = true;
1650 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1651 }
1652
1653 // If NoGap is true, pop consecutive registers and then leave the rest
1654 // for other instructions. e.g.
1655 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1656 if (NoGap && LastReg && LastReg != Reg-1)
1657 break;
1658
1659 LastReg = Reg;
1660 Regs.push_back(Reg);
1661 }
1662
1663 if (Regs.empty())
1664 continue;
1665
1666 llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1667 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1668 });
1669
1670 if (Regs.size() > 1 || LdrOpc == 0) {
1671 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1672 .addReg(ARM::SP)
1675 for (unsigned Reg : Regs)
1676 MIB.addReg(Reg, getDefRegState(true));
1677 if (DeleteRet) {
1678 if (MI != MBB.end()) {
1679 MIB.copyImplicitOps(*MI);
1680 MI->eraseFromParent();
1681 }
1682 }
1683 MI = MIB;
1684 } else if (Regs.size() == 1) {
1685 // If we adjusted the reg to PC from LR above, switch it back here. We
1686 // only do that for LDM.
1687 if (Regs[0] == ARM::PC)
1688 Regs[0] = ARM::LR;
1690 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1691 .addReg(ARM::SP, RegState::Define)
1692 .addReg(ARM::SP)
1694 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1695 // that refactoring is complete (eventually).
1696 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1697 MIB.addReg(0);
1699 } else
1700 MIB.addImm(4);
1701 MIB.add(predOps(ARMCC::AL));
1702 }
1703 Regs.clear();
1704
1705 // Put any subsequent vpop instructions after this one: they will refer to
1706 // higher register numbers so need to be popped afterwards.
1707 if (MI != MBB.end())
1708 ++MI;
1709 }
1710}
1711
1712/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1713/// starting from d8. Also insert stack realignment code and leave the stack
1714/// pointer pointing to the d8 spill slot.
1717 unsigned NumAlignedDPRCS2Regs,
1719 const TargetRegisterInfo *TRI) {
1720 MachineFunction &MF = *MBB.getParent();
1722 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1724 MachineFrameInfo &MFI = MF.getFrameInfo();
1725
1726 // Mark the D-register spill slots as properly aligned. Since MFI computes
1727 // stack slot layout backwards, this can actually mean that the d-reg stack
1728 // slot offsets can be wrong. The offset for d8 will always be correct.
1729 for (const CalleeSavedInfo &I : CSI) {
1730 unsigned DNum = I.getReg() - ARM::D8;
1731 if (DNum > NumAlignedDPRCS2Regs - 1)
1732 continue;
1733 int FI = I.getFrameIdx();
1734 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1735 // registers will be 8-byte aligned.
1736 MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1737
1738 // The stack slot for D8 needs to be maximally aligned because this is
1739 // actually the point where we align the stack pointer. MachineFrameInfo
1740 // computes all offsets relative to the incoming stack pointer which is a
1741 // bit weird when realigning the stack. Any extra padding for this
1742 // over-alignment is not realized because the code inserted below adjusts
1743 // the stack pointer by numregs * 8 before aligning the stack pointer.
1744 if (DNum == 0)
1745 MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1746 }
1747
1748 // Move the stack pointer to the d8 spill slot, and align it at the same
1749 // time. Leave the stack slot address in the scratch register r4.
1750 //
1751 // sub r4, sp, #numregs * 8
1752 // bic r4, r4, #align - 1
1753 // mov sp, r4
1754 //
1755 bool isThumb = AFI->isThumbFunction();
1756 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1757 AFI->setShouldRestoreSPFromFP(true);
1758
1759 // sub r4, sp, #numregs * 8
1760 // The immediate is <= 64, so it doesn't need any special encoding.
1761 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1762 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1763 .addReg(ARM::SP)
1764 .addImm(8 * NumAlignedDPRCS2Regs)
1766 .add(condCodeOp());
1767
1768 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1769 // We must set parameter MustBeSingleInstruction to true, since
1770 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1771 // stack alignment. Luckily, this can always be done since all ARM
1772 // architecture versions that support Neon also support the BFC
1773 // instruction.
1774 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1775
1776 // mov sp, r4
1777 // The stack pointer must be adjusted before spilling anything, otherwise
1778 // the stack slots could be clobbered by an interrupt handler.
1779 // Leave r4 live, it is used below.
1780 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1781 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1782 .addReg(ARM::R4)
1784 if (!isThumb)
1785 MIB.add(condCodeOp());
1786
1787 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1788 // r4 holds the stack slot address.
1789 unsigned NextReg = ARM::D8;
1790
1791 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1792 // The writeback is only needed when emitting two vst1.64 instructions.
1793 if (NumAlignedDPRCS2Regs >= 6) {
1794 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1795 &ARM::QQPRRegClass);
1796 MBB.addLiveIn(SupReg);
1797 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1798 .addReg(ARM::R4, RegState::Kill)
1799 .addImm(16)
1800 .addReg(NextReg)
1803 NextReg += 4;
1804 NumAlignedDPRCS2Regs -= 4;
1805 }
1806
1807 // We won't modify r4 beyond this point. It currently points to the next
1808 // register to be spilled.
1809 unsigned R4BaseReg = NextReg;
1810
1811 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1812 if (NumAlignedDPRCS2Regs >= 4) {
1813 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1814 &ARM::QQPRRegClass);
1815 MBB.addLiveIn(SupReg);
1816 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1817 .addReg(ARM::R4)
1818 .addImm(16)
1819 .addReg(NextReg)
1822 NextReg += 4;
1823 NumAlignedDPRCS2Regs -= 4;
1824 }
1825
1826 // 16-byte aligned vst1.64 with 2 d-regs.
1827 if (NumAlignedDPRCS2Regs >= 2) {
1828 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1829 &ARM::QPRRegClass);
1830 MBB.addLiveIn(SupReg);
1831 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1832 .addReg(ARM::R4)
1833 .addImm(16)
1834 .addReg(SupReg)
1836 NextReg += 2;
1837 NumAlignedDPRCS2Regs -= 2;
1838 }
1839
1840 // Finally, use a vanilla vstr.64 for the odd last register.
1841 if (NumAlignedDPRCS2Regs) {
1842 MBB.addLiveIn(NextReg);
1843 // vstr.64 uses addrmode5 which has an offset scale of 4.
1844 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1845 .addReg(NextReg)
1846 .addReg(ARM::R4)
1847 .addImm((NextReg - R4BaseReg) * 2)
1849 }
1850
1851 // The last spill instruction inserted should kill the scratch register r4.
1852 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1853}
1854
1855/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1856/// iterator to the following instruction.
1859 unsigned NumAlignedDPRCS2Regs) {
1860 // sub r4, sp, #numregs * 8
1861 // bic r4, r4, #align - 1
1862 // mov sp, r4
1863 ++MI; ++MI; ++MI;
1864 assert(MI->mayStore() && "Expecting spill instruction");
1865
1866 // These switches all fall through.
1867 switch(NumAlignedDPRCS2Regs) {
1868 case 7:
1869 ++MI;
1870 assert(MI->mayStore() && "Expecting spill instruction");
1871 [[fallthrough]];
1872 default:
1873 ++MI;
1874 assert(MI->mayStore() && "Expecting spill instruction");
1875 [[fallthrough]];
1876 case 1:
1877 case 2:
1878 case 4:
1879 assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");
1880 ++MI;
1881 }
1882 return MI;
1883}
1884
1885/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1886/// starting from d8. These instructions are assumed to execute while the
1887/// stack is still aligned, unlike the code inserted by emitPopInst.
1890 unsigned NumAlignedDPRCS2Regs,
1892 const TargetRegisterInfo *TRI) {
1893 MachineFunction &MF = *MBB.getParent();
1895 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1897
1898 // Find the frame index assigned to d8.
1899 int D8SpillFI = 0;
1900 for (const CalleeSavedInfo &I : CSI)
1901 if (I.getReg() == ARM::D8) {
1902 D8SpillFI = I.getFrameIdx();
1903 break;
1904 }
1905
1906 // Materialize the address of the d8 spill slot into the scratch register r4.
1907 // This can be fairly complicated if the stack frame is large, so just use
1908 // the normal frame index elimination mechanism to do it. This code runs as
1909 // the initial part of the epilog where the stack and base pointers haven't
1910 // been changed yet.
1911 bool isThumb = AFI->isThumbFunction();
1912 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1913
1914 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1915 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1916 .addFrameIndex(D8SpillFI)
1917 .addImm(0)
1919 .add(condCodeOp());
1920
1921 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1922 unsigned NextReg = ARM::D8;
1923
1924 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1925 if (NumAlignedDPRCS2Regs >= 6) {
1926 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1927 &ARM::QQPRRegClass);
1928 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1929 .addReg(ARM::R4, RegState::Define)
1930 .addReg(ARM::R4, RegState::Kill)
1931 .addImm(16)
1934 NextReg += 4;
1935 NumAlignedDPRCS2Regs -= 4;
1936 }
1937
1938 // We won't modify r4 beyond this point. It currently points to the next
1939 // register to be spilled.
1940 unsigned R4BaseReg = NextReg;
1941
1942 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1943 if (NumAlignedDPRCS2Regs >= 4) {
1944 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1945 &ARM::QQPRRegClass);
1946 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1947 .addReg(ARM::R4)
1948 .addImm(16)
1951 NextReg += 4;
1952 NumAlignedDPRCS2Regs -= 4;
1953 }
1954
1955 // 16-byte aligned vld1.64 with 2 d-regs.
1956 if (NumAlignedDPRCS2Regs >= 2) {
1957 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1958 &ARM::QPRRegClass);
1959 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1960 .addReg(ARM::R4)
1961 .addImm(16)
1963 NextReg += 2;
1964 NumAlignedDPRCS2Regs -= 2;
1965 }
1966
1967 // Finally, use a vanilla vldr.64 for the remaining odd register.
1968 if (NumAlignedDPRCS2Regs)
1969 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1970 .addReg(ARM::R4)
1971 .addImm(2 * (NextReg - R4BaseReg))
1973
1974 // Last store kills r4.
1975 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1976}
1977
1981 if (CSI.empty())
1982 return false;
1983
1984 MachineFunction &MF = *MBB.getParent();
1986
1987 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1988 unsigned PushOneOpc = AFI->isThumbFunction() ?
1989 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1990 unsigned FltOpc = ARM::VSTMDDB_UPD;
1991 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1992 // Compute PAC in R12.
1993 if (AFI->shouldSignReturnAddress()) {
1994 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
1996 }
1997 // Save the non-secure floating point context.
1998 if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
1999 return C.getReg() == ARM::FPCXTNS;
2000 })) {
2001 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
2002 ARM::SP)
2003 .addReg(ARM::SP)
2004 .addImm(-4)
2006 }
2007 if (STI.splitFramePointerPush(MF)) {
2008 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2010 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2011 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2012 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2014 } else {
2015 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
2017 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
2019 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2020 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2021 }
2022
2023 // The code above does not insert spill code for the aligned DPRCS2 registers.
2024 // The stack realignment code will be inserted between the push instructions
2025 // and these spills.
2026 if (NumAlignedDPRCS2Regs)
2027 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2028
2029 return true;
2030}
2031
2035 if (CSI.empty())
2036 return false;
2037
2038 MachineFunction &MF = *MBB.getParent();
2040 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2041 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2042
2043 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2044 // registers. Do that here instead.
2045 if (NumAlignedDPRCS2Regs)
2046 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2047
2048 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2049 unsigned LdrOpc =
2050 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2051 unsigned FltOpc = ARM::VLDMDIA_UPD;
2052 if (STI.splitFramePointerPush(MF)) {
2053 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2055 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2056 NumAlignedDPRCS2Regs);
2057 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2059 } else {
2060 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2061 NumAlignedDPRCS2Regs);
2062 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2063 &isARMArea2Register, 0);
2064 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2065 &isARMArea1Register, 0);
2066 }
2067
2068 return true;
2069}
2070
2071// FIXME: Make generic?
2073 const ARMBaseInstrInfo &TII) {
2074 unsigned FnSize = 0;
2075 for (auto &MBB : MF) {
2076 for (auto &MI : MBB)
2077 FnSize += TII.getInstSizeInBytes(MI);
2078 }
2079 if (MF.getJumpTableInfo())
2080 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2081 FnSize += Table.MBBs.size() * 4;
2082 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2083 return FnSize;
2084}
2085
2086/// estimateRSStackSizeLimit - Look at each instruction that references stack
2087/// frames and return the stack size limit beyond which some of these
2088/// instructions will require a scratch register during their expansion later.
2089// FIXME: Move to TII?
2091 const TargetFrameLowering *TFI,
2092 bool &HasNonSPFrameIndex) {
2093 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2094 const ARMBaseInstrInfo &TII =
2095 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2097 unsigned Limit = (1 << 12) - 1;
2098 for (auto &MBB : MF) {
2099 for (auto &MI : MBB) {
2100 if (MI.isDebugInstr())
2101 continue;
2102 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2103 if (!MI.getOperand(i).isFI())
2104 continue;
2105
2106 // When using ADDri to get the address of a stack object, 255 is the
2107 // largest offset guaranteed to fit in the immediate offset.
2108 if (MI.getOpcode() == ARM::ADDri) {
2109 Limit = std::min(Limit, (1U << 8) - 1);
2110 break;
2111 }
2112 // t2ADDri will not require an extra register, it can reuse the
2113 // destination.
2114 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2115 break;
2116
2117 const MCInstrDesc &MCID = MI.getDesc();
2118 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
2119 if (RegClass && !RegClass->contains(ARM::SP))
2120 HasNonSPFrameIndex = true;
2121
2122 // Otherwise check the addressing mode.
2123 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2125 case ARMII::AddrMode2:
2126 // Default 12 bit limit.
2127 break;
2128 case ARMII::AddrMode3:
2130 Limit = std::min(Limit, (1U << 8) - 1);
2131 break;
2133 Limit = std::min(Limit, ((1U << 8) - 1) * 2);
2134 break;
2135 case ARMII::AddrMode5:
2138 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
2139 break;
2141 // i12 supports only positive offset so these will be converted to
2142 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2143 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2144 Limit = std::min(Limit, (1U << 8) - 1);
2145 break;
2146 case ARMII::AddrMode4:
2147 case ARMII::AddrMode6:
2148 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2149 // immediate offset for stack references.
2150 return 0;
2152 Limit = std::min(Limit, ((1U << 7) - 1) * 1);
2153 break;
2155 Limit = std::min(Limit, ((1U << 7) - 1) * 2);
2156 break;
2158 Limit = std::min(Limit, ((1U << 7) - 1) * 4);
2159 break;
2160 default:
2161 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2162 }
2163 break; // At most one FI per instruction
2164 }
2165 }
2166 }
2167
2168 return Limit;
2169}
2170
2171// In functions that realign the stack, it can be an advantage to spill the
2172// callee-saved vector registers after realigning the stack. The vst1 and vld1
2173// instructions take alignment hints that can improve performance.
2174static void
2176 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2178 return;
2179
2180 // Naked functions don't spill callee-saved registers.
2181 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2182 return;
2183
2184 // We are planning to use NEON instructions vst1 / vld1.
2185 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2186 return;
2187
2188 // Don't bother if the default stack alignment is sufficiently high.
2190 return;
2191
2192 // Aligned spills require stack realignment.
2193 if (!static_cast<const ARMBaseRegisterInfo *>(
2195 return;
2196
2197 // We always spill contiguous d-registers starting from d8. Count how many
2198 // needs spilling. The register allocator will almost always use the
2199 // callee-saved registers in order, but it can happen that there are holes in
2200 // the range. Registers above the hole will be spilled to the standard DPRCS
2201 // area.
2202 unsigned NumSpills = 0;
2203 for (; NumSpills < 8; ++NumSpills)
2204 if (!SavedRegs.test(ARM::D8 + NumSpills))
2205 break;
2206
2207 // Don't do this for just one d-register. It's not worth it.
2208 if (NumSpills < 2)
2209 return;
2210
2211 // Spill the first NumSpills D-registers after realigning the stack.
2212 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2213
2214 // A scratch register is required for the vst1 / vld1 instructions.
2215 SavedRegs.set(ARM::R4);
2216}
2217
2219 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2220 // upon function entry (resp. restore it immmediately before return)
2221 if (STI.hasV8_1MMainlineOps() &&
2223 return false;
2224
2225 // We are disabling shrinkwrapping for now when PAC is enabled, as
2226 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2227 // generated. A follow-up patch will fix this in a more performant manner.
2229 true /* SpillsLR */))
2230 return false;
2231
2232 return true;
2233}
2234
2236 const MachineFunction &MF) const {
2237 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2238 return Subtarget.createAAPCSFrameChain() && hasFP(MF);
2239}
2240
2241// Thumb1 may require a spill when storing to a frame index through FP (or any
2242// access with execute-only), for cases where FP is a high register (R11). This
2243// scans the function for cases where this may happen.
2245 const TargetFrameLowering &TFI) {
2246 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2247 if (!AFI->isThumb1OnlyFunction())
2248 return false;
2249
2250 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2251 for (const auto &MBB : MF)
2252 for (const auto &MI : MBB)
2253 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2254 STI.genExecuteOnly())
2255 for (const auto &Op : MI.operands())
2256 if (Op.isFI()) {
2257 Register Reg;
2258 TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
2259 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2260 return true;
2261 }
2262 return false;
2263}
2264
2266 BitVector &SavedRegs,
2267 RegScavenger *RS) const {
2269 // This tells PEI to spill the FP as if it is any other callee-save register
2270 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2271 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2272 // to combine multiple loads / stores.
2273 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
2274 bool CS1Spilled = false;
2275 bool LRSpilled = false;
2276 unsigned NumGPRSpills = 0;
2277 unsigned NumFPRSpills = 0;
2278 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2279 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2280 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2282 const ARMBaseInstrInfo &TII =
2283 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2285 MachineFrameInfo &MFI = MF.getFrameInfo();
2288 (void)TRI; // Silence unused warning in non-assert builds.
2289 Register FramePtr = RegInfo->getFrameRegister(MF);
2290
2291 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2292 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2293 // since it's not always possible to restore sp from fp in a single
2294 // instruction.
2295 // FIXME: It will be better just to find spare register here.
2296 if (AFI->isThumb2Function() &&
2297 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2298 SavedRegs.set(ARM::R4);
2299
2300 // If a stack probe will be emitted, spill R4 and LR, since they are
2301 // clobbered by the stack probe call.
2302 // This estimate should be a safe, conservative estimate. The actual
2303 // stack probe is enabled based on the size of the local objects;
2304 // this estimate also includes the varargs store size.
2305 if (STI.isTargetWindows() &&
2306 WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2307 SavedRegs.set(ARM::R4);
2308 SavedRegs.set(ARM::LR);
2309 }
2310
2311 if (AFI->isThumb1OnlyFunction()) {
2312 // Spill LR if Thumb1 function uses variable length argument lists.
2313 if (AFI->getArgRegsSaveSize() > 0)
2314 SavedRegs.set(ARM::LR);
2315
2316 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2317 // requires stack alignment. We don't know for sure what the stack size
2318 // will be, but for this, an estimate is good enough. If there anything
2319 // changes it, it'll be a spill, which implies we've used all the registers
2320 // and so R4 is already used, so not marking it here will be OK.
2321 // FIXME: It will be better just to find spare register here.
2322 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2323 MFI.estimateStackSize(MF) > 508)
2324 SavedRegs.set(ARM::R4);
2325 }
2326
2327 // See if we can spill vector registers to aligned stack.
2328 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2329
2330 // Spill the BasePtr if it's used.
2331 if (RegInfo->hasBasePointer(MF))
2332 SavedRegs.set(RegInfo->getBaseRegister());
2333
2334 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2335 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2336 CanEliminateFrame = false;
2337
2338 // When return address signing is enabled R12 is treated as callee-saved.
2339 if (AFI->shouldSignReturnAddress())
2340 CanEliminateFrame = false;
2341
2342 // Don't spill FP if the frame can be eliminated. This is determined
2343 // by scanning the callee-save registers to see if any is modified.
2344 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2345 for (unsigned i = 0; CSRegs[i]; ++i) {
2346 unsigned Reg = CSRegs[i];
2347 bool Spilled = false;
2348 if (SavedRegs.test(Reg)) {
2349 Spilled = true;
2350 CanEliminateFrame = false;
2351 }
2352
2353 if (!ARM::GPRRegClass.contains(Reg)) {
2354 if (Spilled) {
2355 if (ARM::SPRRegClass.contains(Reg))
2356 NumFPRSpills++;
2357 else if (ARM::DPRRegClass.contains(Reg))
2358 NumFPRSpills += 2;
2359 else if (ARM::QPRRegClass.contains(Reg))
2360 NumFPRSpills += 4;
2361 }
2362 continue;
2363 }
2364
2365 if (Spilled) {
2366 NumGPRSpills++;
2367
2368 if (!STI.splitFramePushPop(MF)) {
2369 if (Reg == ARM::LR)
2370 LRSpilled = true;
2371 CS1Spilled = true;
2372 continue;
2373 }
2374
2375 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2376 switch (Reg) {
2377 case ARM::LR:
2378 LRSpilled = true;
2379 [[fallthrough]];
2380 case ARM::R0: case ARM::R1:
2381 case ARM::R2: case ARM::R3:
2382 case ARM::R4: case ARM::R5:
2383 case ARM::R6: case ARM::R7:
2384 CS1Spilled = true;
2385 break;
2386 default:
2387 break;
2388 }
2389 } else {
2390 if (!STI.splitFramePushPop(MF)) {
2391 UnspilledCS1GPRs.push_back(Reg);
2392 continue;
2393 }
2394
2395 switch (Reg) {
2396 case ARM::R0: case ARM::R1:
2397 case ARM::R2: case ARM::R3:
2398 case ARM::R4: case ARM::R5:
2399 case ARM::R6: case ARM::R7:
2400 case ARM::LR:
2401 UnspilledCS1GPRs.push_back(Reg);
2402 break;
2403 default:
2404 UnspilledCS2GPRs.push_back(Reg);
2405 break;
2406 }
2407 }
2408 }
2409
2410 bool ForceLRSpill = false;
2411 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2412 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2413 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2414 // use of BL to implement far jump.
2415 if (FnSize >= (1 << 11)) {
2416 CanEliminateFrame = false;
2417 ForceLRSpill = true;
2418 }
2419 }
2420
2421 // If any of the stack slot references may be out of range of an immediate
2422 // offset, make sure a register (or a spill slot) is available for the
2423 // register scavenger. Note that if we're indexing off the frame pointer, the
2424 // effective stack size is 4 bytes larger since the FP points to the stack
2425 // slot of the previous FP. Also, if we have variable sized objects in the
2426 // function, stack slot references will often be negative, and some of
2427 // our instructions are positive-offset only, so conservatively consider
2428 // that case to want a spill slot (or register) as well. Similarly, if
2429 // the function adjusts the stack pointer during execution and the
2430 // adjustments aren't already part of our stack size estimate, our offset
2431 // calculations may be off, so be conservative.
2432 // FIXME: We could add logic to be more precise about negative offsets
2433 // and which instructions will need a scratch register for them. Is it
2434 // worth the effort and added fragility?
2435 unsigned EstimatedStackSize =
2436 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2437
2438 // Determine biggest (positive) SP offset in MachineFrameInfo.
2439 int MaxFixedOffset = 0;
2440 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2441 int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2442 MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2443 }
2444
2445 bool HasFP = hasFP(MF);
2446 if (HasFP) {
2447 if (AFI->hasStackFrame())
2448 EstimatedStackSize += 4;
2449 } else {
2450 // If FP is not used, SP will be used to access arguments, so count the
2451 // size of arguments into the estimation.
2452 EstimatedStackSize += MaxFixedOffset;
2453 }
2454 EstimatedStackSize += 16; // For possible paddings.
2455
2456 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2457 bool HasNonSPFrameIndex = false;
2458 if (AFI->isThumb1OnlyFunction()) {
2459 // For Thumb1, don't bother to iterate over the function. The only
2460 // instruction that requires an emergency spill slot is a store to a
2461 // frame index.
2462 //
2463 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2464 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2465 // a 5-bit unsigned immediate.
2466 //
2467 // We could try to check if the function actually contains a tSTRspi
2468 // that might need the spill slot, but it's not really important.
2469 // Functions with VLAs or extremely large call frames are rare, and
2470 // if a function is allocating more than 1KB of stack, an extra 4-byte
2471 // slot probably isn't relevant.
2472 //
2473 // A special case is the scenario where r11 is used as FP, where accesses
2474 // to a frame index will require its value to be moved into a low reg.
2475 // This is handled later on, once we are able to determine if we have any
2476 // fp-relative accesses.
2477 if (RegInfo->hasBasePointer(MF))
2478 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2479 else
2480 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2481 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2482 } else {
2483 EstimatedRSStackSizeLimit =
2484 estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2485 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2486 }
2487 // Final estimate of whether sp or bp-relative accesses might require
2488 // scavenging.
2489 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2490
2491 // If the stack pointer moves and we don't have a base pointer, the
2492 // estimate logic doesn't work. The actual offsets might be larger when
2493 // we're constructing a call frame, or we might need to use negative
2494 // offsets from fp.
2495 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2496 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2497 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2498
2499 // If we have a frame pointer, we assume arguments will be accessed
2500 // relative to the frame pointer. Check whether fp-relative accesses to
2501 // arguments require scavenging.
2502 //
2503 // We could do slightly better on Thumb1; in some cases, an sp-relative
2504 // offset would be legal even though an fp-relative offset is not.
2505 int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
2506 bool HasLargeArgumentList =
2507 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2508
2509 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2510 HasLargeArgumentList || HasNonSPFrameIndex;
2511 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2512 << "; EstimatedStack: " << EstimatedStackSize
2513 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2514 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2515 if (BigFrameOffsets ||
2516 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2517 AFI->setHasStackFrame(true);
2518
2519 if (HasFP) {
2520 SavedRegs.set(FramePtr);
2521 // If the frame pointer is required by the ABI, also spill LR so that we
2522 // emit a complete frame record.
2523 if ((requiresAAPCSFrameRecord(MF) ||
2525 !LRSpilled) {
2526 SavedRegs.set(ARM::LR);
2527 LRSpilled = true;
2528 NumGPRSpills++;
2529 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2530 if (LRPos != UnspilledCS1GPRs.end())
2531 UnspilledCS1GPRs.erase(LRPos);
2532 }
2533 auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2534 if (FPPos != UnspilledCS1GPRs.end())
2535 UnspilledCS1GPRs.erase(FPPos);
2536 NumGPRSpills++;
2537 if (FramePtr == ARM::R7)
2538 CS1Spilled = true;
2539 }
2540
2541 // This is the number of extra spills inserted for callee-save GPRs which
2542 // would not otherwise be used by the function. When greater than zero it
2543 // guaranteees that it is possible to scavenge a register to hold the
2544 // address of a stack slot. On Thumb1, the register must be a valid operand
2545 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2546 // or lr.
2547 //
2548 // If we don't insert a spill, we instead allocate an emergency spill
2549 // slot, which can be used by scavenging to spill an arbitrary register.
2550 //
2551 // We currently don't try to figure out whether any specific instruction
2552 // requires scavening an additional register.
2553 unsigned NumExtraCSSpill = 0;
2554
2555 if (AFI->isThumb1OnlyFunction()) {
2556 // For Thumb1-only targets, we need some low registers when we save and
2557 // restore the high registers (which aren't allocatable, but could be
2558 // used by inline assembly) because the push/pop instructions can not
2559 // access high registers. If necessary, we might need to push more low
2560 // registers to ensure that there is at least one free that can be used
2561 // for the saving & restoring, and preferably we should ensure that as
2562 // many as are needed are available so that fewer push/pop instructions
2563 // are required.
2564
2565 // Low registers which are not currently pushed, but could be (r4-r7).
2566 SmallVector<unsigned, 4> AvailableRegs;
2567
2568 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2569 // free.
2570 int EntryRegDeficit = 0;
2571 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2572 if (!MF.getRegInfo().isLiveIn(Reg)) {
2573 --EntryRegDeficit;
2575 << printReg(Reg, TRI)
2576 << " is unused argument register, EntryRegDeficit = "
2577 << EntryRegDeficit << "\n");
2578 }
2579 }
2580
2581 // Unused return registers can be clobbered in the epilogue for free.
2582 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2584 << " return regs used, ExitRegDeficit = "
2585 << ExitRegDeficit << "\n");
2586
2587 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2588 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2589
2590 // r4-r6 can be used in the prologue if they are pushed by the first push
2591 // instruction.
2592 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2593 if (SavedRegs.test(Reg)) {
2594 --RegDeficit;
2595 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2596 << " is saved low register, RegDeficit = "
2597 << RegDeficit << "\n");
2598 } else {
2599 AvailableRegs.push_back(Reg);
2600 LLVM_DEBUG(
2601 dbgs()
2602 << printReg(Reg, TRI)
2603 << " is non-saved low register, adding to AvailableRegs\n");
2604 }
2605 }
2606
2607 // r7 can be used if it is not being used as the frame pointer.
2608 if (!HasFP || FramePtr != ARM::R7) {
2609 if (SavedRegs.test(ARM::R7)) {
2610 --RegDeficit;
2611 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2612 << RegDeficit << "\n");
2613 } else {
2614 AvailableRegs.push_back(ARM::R7);
2615 LLVM_DEBUG(
2616 dbgs()
2617 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2618 }
2619 }
2620
2621 // Each of r8-r11 needs to be copied to a low register, then pushed.
2622 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2623 if (SavedRegs.test(Reg)) {
2624 ++RegDeficit;
2625 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2626 << " is saved high register, RegDeficit = "
2627 << RegDeficit << "\n");
2628 }
2629 }
2630
2631 // LR can only be used by PUSH, not POP, and can't be used at all if the
2632 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2633 // are more limited at function entry than exit.
2634 if ((EntryRegDeficit > ExitRegDeficit) &&
2635 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2637 if (SavedRegs.test(ARM::LR)) {
2638 --RegDeficit;
2639 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2640 << RegDeficit << "\n");
2641 } else {
2642 AvailableRegs.push_back(ARM::LR);
2643 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2644 }
2645 }
2646
2647 // If there are more high registers that need pushing than low registers
2648 // available, push some more low registers so that we can use fewer push
2649 // instructions. This might not reduce RegDeficit all the way to zero,
2650 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2651 // need saving.
2652 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2653 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2654 unsigned Reg = AvailableRegs.pop_back_val();
2655 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2656 << " to make up reg deficit\n");
2657 SavedRegs.set(Reg);
2658 NumGPRSpills++;
2659 CS1Spilled = true;
2660 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2661 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2662 NumExtraCSSpill++;
2663 UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2664 if (Reg == ARM::LR)
2665 LRSpilled = true;
2666 }
2667 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2668 << "\n");
2669 }
2670
2671 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2672 // restore LR in that case.
2673 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2674
2675 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2676 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2677 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2678 SavedRegs.set(ARM::LR);
2679 NumGPRSpills++;
2681 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2682 if (LRPos != UnspilledCS1GPRs.end())
2683 UnspilledCS1GPRs.erase(LRPos);
2684
2685 ForceLRSpill = false;
2686 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2687 !AFI->isThumb1OnlyFunction())
2688 NumExtraCSSpill++;
2689 }
2690
2691 // If stack and double are 8-byte aligned and we are spilling an odd number
2692 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2693 // the integer and double callee save areas.
2694 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2695 const Align TargetAlign = getStackAlign();
2696 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2697 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2698 for (unsigned Reg : UnspilledCS1GPRs) {
2699 // Don't spill high register if the function is thumb. In the case of
2700 // Windows on ARM, accept R11 (frame pointer)
2701 if (!AFI->isThumbFunction() ||
2702 (STI.isTargetWindows() && Reg == ARM::R11) ||
2703 isARMLowRegister(Reg) ||
2704 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2705 SavedRegs.set(Reg);
2706 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2707 << " to make up alignment\n");
2708 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2709 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2710 NumExtraCSSpill++;
2711 break;
2712 }
2713 }
2714 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2715 unsigned Reg = UnspilledCS2GPRs.front();
2716 SavedRegs.set(Reg);
2717 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2718 << " to make up alignment\n");
2719 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2720 NumExtraCSSpill++;
2721 }
2722 }
2723
2724 // Estimate if we might need to scavenge registers at some point in order
2725 // to materialize a stack offset. If so, either spill one additional
2726 // callee-saved register or reserve a special spill slot to facilitate
2727 // register scavenging. Thumb1 needs a spill slot for stack pointer
2728 // adjustments and for frame index accesses when FP is high register,
2729 // even when the frame itself is small.
2730 unsigned RegsNeeded = 0;
2731 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this)) {
2732 RegsNeeded++;
2733 // With thumb1 execute-only we may need an additional register for saving
2734 // and restoring the CPSR.
2735 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
2736 RegsNeeded++;
2737 }
2738
2739 if (RegsNeeded > NumExtraCSSpill) {
2740 // If any non-reserved CS register isn't spilled, just spill one or two
2741 // extra. That should take care of it!
2742 unsigned NumExtras = TargetAlign.value() / 4;
2744 while (NumExtras && !UnspilledCS1GPRs.empty()) {
2745 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
2746 if (!MRI.isReserved(Reg) &&
2747 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2748 Extras.push_back(Reg);
2749 NumExtras--;
2750 }
2751 }
2752 // For non-Thumb1 functions, also check for hi-reg CS registers
2753 if (!AFI->isThumb1OnlyFunction()) {
2754 while (NumExtras && !UnspilledCS2GPRs.empty()) {
2755 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
2756 if (!MRI.isReserved(Reg)) {
2757 Extras.push_back(Reg);
2758 NumExtras--;
2759 }
2760 }
2761 }
2762 if (NumExtras == 0) {
2763 for (unsigned Reg : Extras) {
2764 SavedRegs.set(Reg);
2765 if (!MRI.isPhysRegUsed(Reg))
2766 NumExtraCSSpill++;
2767 }
2768 }
2769 while ((RegsNeeded > NumExtraCSSpill) && RS) {
2770 // Reserve a slot closest to SP or frame pointer.
2771 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2772 const TargetRegisterClass &RC = ARM::GPRRegClass;
2773 unsigned Size = TRI->getSpillSize(RC);
2774 Align Alignment = TRI->getSpillAlign(RC);
2776 MFI.CreateStackObject(Size, Alignment, false));
2777 --RegsNeeded;
2778 }
2779 }
2780 }
2781
2782 if (ForceLRSpill)
2783 SavedRegs.set(ARM::LR);
2784 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2785}
2786
2788 MachineFrameInfo &MFI = MF.getFrameInfo();
2789 if (!MFI.isCalleeSavedInfoValid())
2790 return;
2791
2792 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
2793 // into PC so it is not live out of the return block: Clear the Restored bit
2794 // in that case.
2795 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
2796 if (Info.getReg() != ARM::LR)
2797 continue;
2798 if (all_of(MF, [](const MachineBasicBlock &MBB) {
2799 return all_of(MBB.terminators(), [](const MachineInstr &Term) {
2800 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
2801 Term.getOpcode() == ARM::t2LDMIA_RET ||
2802 Term.getOpcode() == ARM::tPOP_RET;
2803 });
2804 })) {
2805 Info.setRestored(false);
2806 break;
2807 }
2808 }
2809}
2810
2812 MachineFunction &MF, RegScavenger *RS) const {
2814 updateLRRestored(MF);
2815}
2816
2818 BitVector &SavedRegs) const {
2820
2821 // If we have the "returned" parameter attribute which guarantees that we
2822 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2823 // record that fact for IPRA.
2824 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2825 if (AFI->getPreservesR0())
2826 SavedRegs.set(ARM::R0);
2827}
2828
2831 std::vector<CalleeSavedInfo> &CSI) const {
2832 // For CMSE entry functions, handle floating-point context as if it was a
2833 // callee-saved register.
2834 if (STI.hasV8_1MMainlineOps() &&
2836 CSI.emplace_back(ARM::FPCXTNS);
2837 CSI.back().setRestored(false);
2838 }
2839
2840 // For functions, which sign their return address, upon function entry, the
2841 // return address PAC is computed in R12. Treat R12 as a callee-saved register
2842 // in this case.
2843 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
2844 if (AFI.shouldSignReturnAddress()) {
2845 // The order of register must match the order we push them, because the
2846 // PEI assigns frame indices in that order. When compiling for return
2847 // address sign and authenication, we use split push, therefore the orders
2848 // we want are:
2849 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2850 CSI.insert(find_if(CSI,
2851 [=](const auto &CS) {
2852 Register Reg = CS.getReg();
2853 return Reg == ARM::R10 || Reg == ARM::R11 ||
2854 Reg == ARM::R8 || Reg == ARM::R9 ||
2855 ARM::DPRRegClass.contains(Reg);
2856 }),
2857 CalleeSavedInfo(ARM::R12));
2858 }
2859
2860 return false;
2861}
2862
2865 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
2866 NumEntries = std::size(FixedSpillOffsets);
2867 return FixedSpillOffsets;
2868}
2869
2870MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2873 const ARMBaseInstrInfo &TII =
2874 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2876 bool isARM = !AFI->isThumbFunction();
2877 DebugLoc dl = I->getDebugLoc();
2878 unsigned Opc = I->getOpcode();
2879 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
2880 unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2881
2882 assert(!AFI->isThumb1OnlyFunction() &&
2883 "This eliminateCallFramePseudoInstr does not support Thumb1!");
2884
2885 int PIdx = I->findFirstPredOperandIdx();
2886 ARMCC::CondCodes Pred = (PIdx == -1)
2887 ? ARMCC::AL
2888 : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
2889 unsigned PredReg = TII.getFramePred(*I);
2890
2891 if (!hasReservedCallFrame(MF)) {
2892 // Bail early if the callee is expected to do the adjustment.
2893 if (IsDestroy && CalleePopAmount != -1U)
2894 return MBB.erase(I);
2895
2896 // If we have alloca, convert as follows:
2897 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2898 // ADJCALLSTACKUP -> add, sp, sp, amount
2899 unsigned Amount = TII.getFrameSize(*I);
2900 if (Amount != 0) {
2901 // We need to keep the stack aligned properly. To do this, we round the
2902 // amount of space needed for the outgoing arguments up to the next
2903 // alignment boundary.
2904 Amount = alignSPAdjust(Amount);
2905
2906 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2907 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2908 Pred, PredReg);
2909 } else {
2910 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2911 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2912 Pred, PredReg);
2913 }
2914 }
2915 } else if (CalleePopAmount != -1U) {
2916 // If the calling convention demands that the callee pops arguments from the
2917 // stack, we want to add it back if we have a reserved call frame.
2918 emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
2919 MachineInstr::NoFlags, Pred, PredReg);
2920 }
2921 return MBB.erase(I);
2922}
2923
2924/// Get the minimum constant for ARM that is greater than or equal to the
2925/// argument. In ARM, constants can have any value that can be produced by
2926/// rotating an 8-bit value to the right by an even number of bits within a
2927/// 32-bit word.
2929 unsigned Shifted = 0;
2930
2931 if (Value == 0)
2932 return 0;
2933
2934 while (!(Value & 0xC0000000)) {
2935 Value = Value << 2;
2936 Shifted += 2;
2937 }
2938
2939 bool Carry = (Value & 0x00FFFFFF);
2940 Value = ((Value & 0xFF000000) >> 24) + Carry;
2941
2942 if (Value & 0x0000100)
2943 Value = Value & 0x000001FC;
2944
2945 if (Shifted > 24)
2946 Value = Value >> (Shifted - 24);
2947 else
2948 Value = Value << (24 - Shifted);
2949
2950 return Value;
2951}
2952
2953// The stack limit in the TCB is set to this many bytes above the actual
2954// stack limit.
2956
2957// Adjust the function prologue to enable split stacks. This currently only
2958// supports android and linux.
2959//
2960// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2961// must be well defined in order to allow for consistent implementations of the
2962// __morestack helper function. The ABI is also not a normal ABI in that it
2963// doesn't follow the normal calling conventions because this allows the
2964// prologue of each function to be optimized further.
2965//
2966// Currently, the ABI looks like (when calling __morestack)
2967//
2968// * r4 holds the minimum stack size requested for this function call
2969// * r5 holds the stack size of the arguments to the function
2970// * the beginning of the function is 3 instructions after the call to
2971// __morestack
2972//
2973// Implementations of __morestack should use r4 to allocate a new stack, r5 to
2974// place the arguments on to the new stack, and the 3-instruction knowledge to
2975// jump directly to the body of the function when working on the new stack.
2976//
2977// An old (and possibly no longer compatible) implementation of __morestack for
2978// ARM can be found at [1].
2979//
2980// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2982 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2983 unsigned Opcode;
2984 unsigned CFIIndex;
2985 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2986 bool Thumb = ST->isThumb();
2987 bool Thumb2 = ST->isThumb2();
2988
2989 // Sadly, this currently doesn't support varargs, platforms other than
2990 // android/linux. Note that thumb1/thumb2 are support for android/linux.
2991 if (MF.getFunction().isVarArg())
2992 report_fatal_error("Segmented stacks do not support vararg functions.");
2993 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2994 report_fatal_error("Segmented stacks not supported on this platform.");
2995
2996 MachineFrameInfo &MFI = MF.getFrameInfo();
2997 MCContext &Context = MF.getContext();
2998 const MCRegisterInfo *MRI = Context.getRegisterInfo();
2999 const ARMBaseInstrInfo &TII =
3000 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3002 DebugLoc DL;
3003
3004 if (!MFI.needsSplitStackProlog())
3005 return;
3006
3007 uint64_t StackSize = MFI.getStackSize();
3008
3009 // Use R4 and R5 as scratch registers.
3010 // We save R4 and R5 before use and restore them before leaving the function.
3011 unsigned ScratchReg0 = ARM::R4;
3012 unsigned ScratchReg1 = ARM::R5;
3013 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3014 uint64_t AlignedStackSize;
3015
3016 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3017 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3021
3022 // Grab everything that reaches PrologueMBB to update there liveness as well.
3023 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3025 WalkList.push_back(&PrologueMBB);
3026
3027 do {
3028 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3029 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3030 if (BeforePrologueRegion.insert(PredBB).second)
3031 WalkList.push_back(PredBB);
3032 }
3033 } while (!WalkList.empty());
3034
3035 // The order in that list is important.
3036 // The blocks will all be inserted before PrologueMBB using that order.
3037 // Therefore the block that should appear first in the CFG should appear
3038 // first in the list.
3039 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3040 PostStackMBB};
3041
3042 for (MachineBasicBlock *B : AddedBlocks)
3043 BeforePrologueRegion.insert(B);
3044
3045 for (const auto &LI : PrologueMBB.liveins()) {
3046 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3047 PredBB->addLiveIn(LI);
3048 }
3049
3050 // Remove the newly added blocks from the list, since we know
3051 // we do not have to do the following updates for them.
3052 for (MachineBasicBlock *B : AddedBlocks) {
3053 BeforePrologueRegion.erase(B);
3054 MF.insert(PrologueMBB.getIterator(), B);
3055 }
3056
3057 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3058 // Make sure the LiveIns are still sorted and unique.
3060 // Replace the edges to PrologueMBB by edges to the sequences
3061 // we are about to add, but only update for immediate predecessors.
3062 if (MBB->isSuccessor(&PrologueMBB))
3063 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
3064 }
3065
3066 // The required stack size that is aligned to ARM constant criterion.
3067 AlignedStackSize = alignToARMConstant(StackSize);
3068
3069 // When the frame size is less than 256 we just compare the stack
3070 // boundary directly to the value of the stack pointer, per gcc.
3071 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3072
3073 // We will use two of the callee save registers as scratch registers so we
3074 // need to save those registers onto the stack.
3075 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3076 // requested and arguments for __morestack().
3077 // SR0: Scratch Register #0
3078 // SR1: Scratch Register #1
3079 // push {SR0, SR1}
3080 if (Thumb) {
3081 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3083 .addReg(ScratchReg0)
3084 .addReg(ScratchReg1);
3085 } else {
3086 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3087 .addReg(ARM::SP, RegState::Define)
3088 .addReg(ARM::SP)
3090 .addReg(ScratchReg0)
3091 .addReg(ScratchReg1);
3092 }
3093
3094 // Emit the relevant DWARF information about the change in stack pointer as
3095 // well as where to find both r4 and r5 (the callee-save registers)
3096 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3097 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
3098 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3099 .addCFIIndex(CFIIndex);
3101 nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
3102 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3103 .addCFIIndex(CFIIndex);
3105 nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
3106 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3107 .addCFIIndex(CFIIndex);
3108 }
3109
3110 // mov SR1, sp
3111 if (Thumb) {
3112 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3113 .addReg(ARM::SP)
3115 } else if (CompareStackPointer) {
3116 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3117 .addReg(ARM::SP)
3119 .add(condCodeOp());
3120 }
3121
3122 // sub SR1, sp, #StackSize
3123 if (!CompareStackPointer && Thumb) {
3124 if (AlignedStackSize < 256) {
3125 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3126 .add(condCodeOp())
3127 .addReg(ScratchReg1)
3128 .addImm(AlignedStackSize)
3130 } else {
3131 if (Thumb2 || ST->genExecuteOnly()) {
3132 BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)
3133 .addImm(AlignedStackSize);
3134 } else {
3135 auto MBBI = McrMBB->end();
3136 auto RegInfo = STI.getRegisterInfo();
3137 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3138 AlignedStackSize);
3139 }
3140 BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3141 .add(condCodeOp())
3142 .addReg(ScratchReg1)
3143 .addReg(ScratchReg0)
3145 }
3146 } else if (!CompareStackPointer) {
3147 if (AlignedStackSize < 256) {
3148 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3149 .addReg(ARM::SP)
3150 .addImm(AlignedStackSize)
3152 .add(condCodeOp());
3153 } else {
3154 auto MBBI = McrMBB->end();
3155 auto RegInfo = STI.getRegisterInfo();
3156 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3157 AlignedStackSize);
3158 BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3159 .addReg(ARM::SP)
3160 .addReg(ScratchReg0)
3162 .add(condCodeOp());
3163 }
3164 }
3165
3166 if (Thumb && ST->isThumb1Only()) {
3167 if (ST->genExecuteOnly()) {
3168 BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)
3169 .addExternalSymbol("__STACK_LIMIT");
3170 } else {
3171 unsigned PCLabelId = ARMFI->createPICLabelUId();
3173 MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
3175 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
3176
3177 // ldr SR0, [pc, offset(STACK_LIMIT)]
3178 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3181 }
3182
3183 // ldr SR0, [SR0]
3184 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3185 .addReg(ScratchReg0)
3186 .addImm(0)
3188 } else {
3189 // Get TLS base address from the coprocessor
3190 // mrc p15, #0, SR0, c13, c0, #3
3191 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3192 ScratchReg0)
3193 .addImm(15)
3194 .addImm(0)
3195 .addImm(13)
3196 .addImm(0)
3197 .addImm(3)
3199
3200 // Use the last tls slot on android and a private field of the TCP on linux.
3201 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3202 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3203
3204 // Get the stack limit from the right offset
3205 // ldr SR0, [sr0, #4 * TlsOffset]
3206 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3207 ScratchReg0)
3208 .addReg(ScratchReg0)
3209 .addImm(4 * TlsOffset)
3211 }
3212
3213 // Compare stack limit with stack size requested.
3214 // cmp SR0, SR1
3215 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3216 BuildMI(GetMBB, DL, TII.get(Opcode))
3217 .addReg(ScratchReg0)
3218 .addReg(ScratchReg1)
3220
3221 // This jump is taken if StackLimit <= SP - stack required.
3222 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3223 BuildMI(GetMBB, DL, TII.get(Opcode))
3224 .addMBB(PostStackMBB)
3226 .addReg(ARM::CPSR);
3227
3228 // Calling __morestack(StackSize, Size of stack arguments).
3229 // __morestack knows that the stack size requested is in SR0(r4)
3230 // and amount size of stack arguments is in SR1(r5).
3231
3232 // Pass first argument for the __morestack by Scratch Register #0.
3233 // The amount size of stack required
3234 if (Thumb) {
3235 if (AlignedStackSize < 256) {
3236 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3237 .add(condCodeOp())
3238 .addImm(AlignedStackSize)
3240 } else {
3241 if (Thumb2 || ST->genExecuteOnly()) {
3242 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)
3243 .addImm(AlignedStackSize);
3244 } else {
3245 auto MBBI = AllocMBB->end();
3246 auto RegInfo = STI.getRegisterInfo();
3247 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3248 AlignedStackSize);
3249 }
3250 }
3251 } else {
3252 if (AlignedStackSize < 256) {
3253 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3254 .addImm(AlignedStackSize)
3256 .add(condCodeOp());
3257 } else {
3258 auto MBBI = AllocMBB->end();
3259 auto RegInfo = STI.getRegisterInfo();
3260 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3261 AlignedStackSize);
3262 }
3263 }
3264
3265 // Pass second argument for the __morestack by Scratch Register #1.
3266 // The amount size of stack consumed to save function arguments.
3267 if (Thumb) {
3268 if (ARMFI->getArgumentStackSize() < 256) {
3269 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3270 .add(condCodeOp())
3273 } else {
3274 if (Thumb2 || ST->genExecuteOnly()) {
3275 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)
3277 } else {
3278 auto MBBI = AllocMBB->end();
3279 auto RegInfo = STI.getRegisterInfo();
3280 RegInfo->emitLoadConstPool(
3281 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3283 }
3284 }
3285 } else {
3286 if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3287 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3290 .add(condCodeOp());
3291 } else {
3292 auto MBBI = AllocMBB->end();
3293 auto RegInfo = STI.getRegisterInfo();
3294 RegInfo->emitLoadConstPool(
3295 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3297 }
3298 }
3299
3300 // push {lr} - Save return address of this function.
3301 if (Thumb) {
3302 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3304 .addReg(ARM::LR);
3305 } else {
3306 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3307 .addReg(ARM::SP, RegState::Define)
3308 .addReg(ARM::SP)
3310 .addReg(ARM::LR);
3311 }
3312
3313 // Emit the DWARF info about the change in stack as well as where to find the
3314 // previous link register
3315 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3316 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
3317 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3318 .addCFIIndex(CFIIndex);
3320 nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
3321 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3322 .addCFIIndex(CFIIndex);
3323 }
3324
3325 // Call __morestack().
3326 if (Thumb) {
3327 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3329 .addExternalSymbol("__morestack");
3330 } else {
3331 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3332 .addExternalSymbol("__morestack");
3333 }
3334
3335 // pop {lr} - Restore return address of this original function.
3336 if (Thumb) {
3337 if (ST->isThumb1Only()) {
3338 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3340 .addReg(ScratchReg0);
3341 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3342 .addReg(ScratchReg0)
3344 } else {
3345 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3346 .addReg(ARM::LR, RegState::Define)
3347 .addReg(ARM::SP, RegState::Define)
3348 .addReg(ARM::SP)
3349 .addImm(4)
3351 }
3352 } else {
3353 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3354 .addReg(ARM::SP, RegState::Define)
3355 .addReg(ARM::SP)
3357 .addReg(ARM::LR);
3358 }
3359
3360 // Restore SR0 and SR1 in case of __morestack() was called.
3361 // __morestack() will skip PostStackMBB block so we need to restore
3362 // scratch registers from here.
3363 // pop {SR0, SR1}
3364 if (Thumb) {
3365 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3367 .addReg(ScratchReg0)
3368 .addReg(ScratchReg1);
3369 } else {
3370 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3371 .addReg(ARM::SP, RegState::Define)
3372 .addReg(ARM::SP)
3374 .addReg(ScratchReg0)
3375 .addReg(ScratchReg1);
3376 }
3377
3378 // Update the CFA offset now that we've popped
3379 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3380 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3381 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3382 .addCFIIndex(CFIIndex);
3383 }
3384
3385 // Return from this function.
3386 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3387
3388 // Restore SR0 and SR1 in case of __morestack() was not called.
3389 // pop {SR0, SR1}
3390 if (Thumb) {
3391 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3393 .addReg(ScratchReg0)
3394 .addReg(ScratchReg1);
3395 } else {
3396 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3397 .addReg(ARM::SP, RegState::Define)
3398 .addReg(ARM::SP)
3400 .addReg(ScratchReg0)
3401 .addReg(ScratchReg1);
3402 }
3403
3404 // Update the CFA offset now that we've popped
3405 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3406 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3407 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3408 .addCFIIndex(CFIIndex);
3409
3410 // Tell debuggers that r4 and r5 are now the same as they were in the
3411 // previous function, that they're the "Same Value".
3413 nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
3414 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3415 .addCFIIndex(CFIIndex);
3417 nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
3418 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3419 .addCFIIndex(CFIIndex);
3420 }
3421
3422 // Organizing MBB lists
3423 PostStackMBB->addSuccessor(&PrologueMBB);
3424
3425 AllocMBB->addSuccessor(PostStackMBB);
3426
3427 GetMBB->addSuccessor(PostStackMBB);
3428 GetMBB->addSuccessor(AllocMBB);
3429
3430 McrMBB->addSuccessor(GetMBB);
3431
3432 PrevStackMBB->addSuccessor(McrMBB);
3433
3434#ifdef EXPENSIVE_CHECKS
3435 MF.verify();
3436#endif
3437}
unsigned const MachineRegisterInfo * MRI
static int64_t getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)
Returns how much of the incoming argument stack area (in bytes) we should clean up in an epilogue.
static bool needsWinCFI(const MachineFunction &MF)
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
unsigned RegSize
static bool isThumb(const MCSubtargetInfo &STI)
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
static void insertSEHRange(MachineBasicBlock &MBB, MachineBasicBlock::iterator Start, const MachineBasicBlock::iterator &End, const ARMBaseInstrInfo &TII, unsigned MIFlags)
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, unsigned Flags)
static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, const TargetFrameLowering &TFI)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, const MachineFunction &MF)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
static MachineBasicBlock::iterator initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI)
static int sizeOfSPAdjustment(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Value * RHS
Value * LHS
static const unsigned FramePtr
bool hasBasePointer(const MachineFunction &MF) const
virtual void emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred=ARMCC::AL, Register PredReg=Register(), unsigned MIFlags=MachineInstr::NoFlags) const
emitLoadConstPool - Emits a load from constpool to materialize the specified immediate.
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool cannotEliminateFrame(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
bool canRealignStack(const MachineFunction &MF) const override
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
ARMFrameLowering(const ARMSubtarget &sti)
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
bool keepFramePointer(const MachineFunction &MF) const override
Return true if the target wants to keep the frame pointer regardless of the function attribute "frame...
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool requiresAAPCSFrameRecord(const MachineFunction &MF) const
bool isFPReserved(const MachineFunction &MF) const
isFPReserved - Return true if the frame pointer register should be considered a reserved register on ...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override
getCalleeSavedSpillSlots - This method returns a pointer to an array of pairs, that contains an entry...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, int SPAdj) const
void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const override
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
const ARMSubtarget & STI
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
void setDPRCalleeSavedAreaSize(unsigned s)
unsigned getFPCXTSaveAreaSize() const
unsigned getGPRCalleeSavedArea1Size() const
unsigned getDPRCalleeSavedGapSize() const
void setGPRCalleeSavedArea2Size(unsigned s)
void setDPRCalleeSavedAreaOffset(unsigned o)
void setFramePtrSpillOffset(unsigned o)
unsigned getGPRCalleeSavedArea2Size() const
unsigned getNumAlignedDPRCS2Regs() const
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getArgumentStackToRestore() const
void setFPCXTSaveAreaSize(unsigned s)
unsigned getDPRCalleeSavedAreaSize() const
unsigned getFramePtrSpillOffset() const
unsigned getArgRegsSaveSize() const
void setGPRCalleeSavedArea2Offset(unsigned o)
void setGPRCalleeSavedArea1Offset(unsigned o)
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getArgumentStackSize() const
unsigned getReturnRegsCount() const
bool useMovt() const
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:196
bool isTargetWindows() const
Definition: ARMSubtarget.h:308
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:208
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11),...
Definition: ARMSubtarget.h:385
bool splitFramePointerPush(const MachineFunction &MF) const
bool isTargetELF() const
Definition: ARMSubtarget.h:311
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:204
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:225
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:793
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:565
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:558
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:600
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:573
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:647
Context object for machine code objects.
Definition: MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
iterator_range< livein_iterator > liveins() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to 'Old', change the code and CFG so that it branches to 'N...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int getStackProtectorIndex() const
Return the index for the stack protector object.
int64_t getOffsetAdjustment() const
Return the correction for frame offsets.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:361
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
Information about stack frame layout on the target.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment.
virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const
getFrameIndexReference - This method should return the base register and offset used to reference a f...
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool FramePointerIsReserved(const MachineFunction &MF) const
FramePointerIsReserved - This returns true if the frame pointer must always either point to a new fra...
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
Definition: Value.h:74
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isSplitFPArea1Register(unsigned Reg, bool SplitFramePushPop)
static bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
static bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop)
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
static bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
static bool isSplitFPArea2Register(unsigned Reg, bool SplitFramePushPop)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85