LLVM 23.0.0git
ARMFrameLowering.cpp
Go to the documentation of this file.
1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
115#include "ARMSubtarget.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCInstrDesc.h"
144#include "llvm/Support/CodeGen.h"
147#include "llvm/Support/Debug.h"
152#include <algorithm>
153#include <cassert>
154#include <cstddef>
155#include <cstdint>
156#include <iterator>
157#include <utility>
158#include <vector>
159
160#define DEBUG_TYPE "arm-frame-lowering"
161
162using namespace llvm;
163
164static cl::opt<bool>
165SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
166 cl::desc("Align ARM NEON spills in prolog and epilog"));
167
170 unsigned NumAlignedDPRCS2Regs);
171
181
182/// Get the spill area that Reg should be saved into in the prologue.
185 unsigned NumAlignedDPRCS2Regs,
187 // NoSplit:
188 // push {r0-r12, lr} GPRCS1
189 // vpush {r8-d15} DPRCS1
190 //
191 // SplitR7:
192 // push {r0-r7, lr} GPRCS1
193 // push {r8-r12} GPRCS2
194 // vpush {r8-d15} DPRCS1
195 //
196 // SplitR11WindowsSEH:
197 // push {r0-r10, r12} GPRCS1
198 // vpush {r8-d15} DPRCS1
199 // push {r11, lr} GPRCS3
200 //
201 // SplitR11AAPCSSignRA:
202 // push {r0-r10, r12} GPRSC1
203 // push {r11, lr} GPRCS2
204 // vpush {r8-d15} DPRCS1
205
206 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
207 // the top of the stack frame.
208 // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
209 // of SP. If used, it will be below the other save areas, after the stack has
210 // been re-aligned.
211
212 switch (Reg) {
213 default:
214 dbgs() << "Don't know where to spill " << printReg(Reg, RegInfo) << "\n";
215 llvm_unreachable("Don't know where to spill this register");
216 break;
217
218 case ARM::FPCXTNS:
219 return SpillArea::FPCXT;
220
221 case ARM::FPSCR:
222 case ARM::FPEXC:
223 return SpillArea::FPStatus;
224
225 case ARM::R0:
226 case ARM::R1:
227 case ARM::R2:
228 case ARM::R3:
229 case ARM::R4:
230 case ARM::R5:
231 case ARM::R6:
232 case ARM::R7:
233 return SpillArea::GPRCS1;
234
235 case ARM::R8:
236 case ARM::R9:
237 case ARM::R10:
238 if (Variation == ARMSubtarget::SplitR7)
239 return SpillArea::GPRCS2;
240 else
241 return SpillArea::GPRCS1;
242
243 case ARM::R11:
244 if (Variation == ARMSubtarget::SplitR7 ||
246 return SpillArea::GPRCS2;
247 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
248 return SpillArea::GPRCS3;
249
250 return SpillArea::GPRCS1;
251
252 case ARM::R12:
253 if (Variation == ARMSubtarget::SplitR7)
254 return SpillArea::GPRCS2;
255 else
256 return SpillArea::GPRCS1;
257
258 case ARM::LR:
259 if (Variation == ARMSubtarget::SplitR11AAPCSSignRA)
260 return SpillArea::GPRCS2;
261 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
262 return SpillArea::GPRCS3;
263
264 return SpillArea::GPRCS1;
265
266 case ARM::D0:
267 case ARM::D1:
268 case ARM::D2:
269 case ARM::D3:
270 case ARM::D4:
271 case ARM::D5:
272 case ARM::D6:
273 case ARM::D7:
274 return SpillArea::DPRCS1;
275
276 case ARM::D8:
277 case ARM::D9:
278 case ARM::D10:
279 case ARM::D11:
280 case ARM::D12:
281 case ARM::D13:
282 case ARM::D14:
283 case ARM::D15:
284 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
285 return SpillArea::DPRCS2;
286 else
287 return SpillArea::DPRCS1;
288
289 case ARM::D16:
290 case ARM::D17:
291 case ARM::D18:
292 case ARM::D19:
293 case ARM::D20:
294 case ARM::D21:
295 case ARM::D22:
296 case ARM::D23:
297 case ARM::D24:
298 case ARM::D25:
299 case ARM::D26:
300 case ARM::D27:
301 case ARM::D28:
302 case ARM::D29:
303 case ARM::D30:
304 case ARM::D31:
305 return SpillArea::DPRCS1;
306 }
307}
308
312
314 // iOS always has a FP for backtracking, force other targets to keep their FP
315 // when doing FastISel. The emitted code is currently superior, and in cases
316 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
317 return MF.getSubtarget<ARMSubtarget>().useFastISel();
318}
319
320/// Returns true if the target can safely skip saving callee-saved registers
321/// for noreturn nounwind functions.
323 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
324 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
325 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
326
327 // Frame pointer and link register are not treated as normal CSR, thus we
328 // can always skip CSR saves for nonreturning functions.
329 return true;
330}
331
332/// hasFPImpl - Return true if the specified function should have a dedicated
333/// frame pointer register. This is true if the function has variable sized
334/// allocas or if frame pointer elimination is disabled.
336 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
337 const MachineFrameInfo &MFI = MF.getFrameInfo();
338
339 // Check to see if the target want to forcibly keep frame pointer.
340 if (keepFramePointer(MF))
341 return true;
342
343 // ABI-required frame pointer.
345 return true;
346
347 // Frame pointer required for use within this function.
348 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
349 MFI.isFrameAddressTaken());
350}
351
352/// isFPReserved - Return true if the frame pointer register should be
353/// considered a reserved register on the scope of the specified function.
355 return hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF);
356}
357
358/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
359/// not required, we reserve argument space for call sites in the function
360/// immediately on entry to the current function. This eliminates the need for
361/// add/sub sp brackets around call sites. Returns true if the call frame is
362/// included as part of the stack frame.
364 const MachineFrameInfo &MFI = MF.getFrameInfo();
365 unsigned CFSize = MFI.getMaxCallFrameSize();
366 // It's not always a good idea to include the call frame as part of the
367 // stack frame. ARM (especially Thumb) has small immediate offset to
368 // address the stack frame. So a large call frame can cause poor codegen
369 // and may even makes it impossible to scavenge a register.
370 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
371 return false;
372
373 return !MFI.hasVarSizedObjects();
374}
375
376/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
377/// call frame pseudos can be simplified. Unlike most targets, having a FP
378/// is not sufficient here since we still may reference some objects via SP
379/// even when FP is available in Thumb2 mode.
380bool
384
385// Returns how much of the incoming argument stack area we should clean up in an
386// epilogue. For the C calling convention this will be 0, for guaranteed tail
387// call conventions it can be positive (a normal return or a tail call to a
388// function that uses less stack space for arguments) or negative (for a tail
389// call to a function that needs more stack space than us for arguments).
392 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
393 bool IsTailCallReturn = false;
394 if (MBB.end() != MBBI) {
395 unsigned RetOpcode = MBBI->getOpcode();
396 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
397 RetOpcode == ARM::TCRETURNri ||
398 RetOpcode == ARM::TCRETURNrinotr12;
399 }
401
402 int ArgumentPopSize = 0;
403 if (IsTailCallReturn) {
404 MachineOperand &StackAdjust = MBBI->getOperand(1);
405
406 // For a tail-call in a callee-pops-arguments environment, some or all of
407 // the stack may actually be in use for the call's arguments, this is
408 // calculated during LowerCall and consumed here...
409 ArgumentPopSize = StackAdjust.getImm();
410 } else {
411 // ... otherwise the amount to pop is *all* of the argument space,
412 // conveniently stored in the MachineFunctionInfo by
413 // LowerFormalArguments. This will, of course, be zero for the C calling
414 // convention.
415 ArgumentPopSize = AFI->getArgumentStackToRestore();
416 }
417
418 return ArgumentPopSize;
419}
420
421static bool needsWinCFI(const MachineFunction &MF) {
422 const Function &F = MF.getFunction();
423 return MF.getTarget().getMCAsmInfo().usesWindowsCFI() &&
424 F.needsUnwindTableEntry();
425}
426
427// Given a load or a store instruction, generate an appropriate unwinding SEH
428// code on Windows.
430 const TargetInstrInfo &TII,
431 unsigned Flags) {
432 unsigned Opc = MBBI->getOpcode();
433 MachineBasicBlock *MBB = MBBI->getParent();
434 MachineFunction &MF = *MBB->getParent();
435 DebugLoc DL = MBBI->getDebugLoc();
437 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
438 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
439
440 Flags |= MachineInstr::NoMerge;
441
442 switch (Opc) {
443 default:
444 report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
445 break;
446 case ARM::t2ADDri: // add.w r11, sp, #xx
447 case ARM::t2ADDri12: // add.w r11, sp, #xx
448 case ARM::t2MOVTi16: // movt r4, #xx
449 case ARM::tBL: // bl __chkstk
450 // These are harmless if used for just setting up a frame pointer,
451 // but that frame pointer can't be relied upon for unwinding, unless
452 // set up with SEH_SaveSP.
453 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
454 .addImm(/*Wide=*/1)
455 .setMIFlags(Flags);
456 break;
457
458 case ARM::t2MOVi16: { // mov(w) r4, #xx
459 bool Wide = MBBI->getOperand(1).getImm() >= 256;
460 if (!Wide) {
461 MachineInstrBuilder NewInstr =
462 BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
463 NewInstr.add(MBBI->getOperand(0));
464 NewInstr.add(t1CondCodeOp(/*isDead=*/true));
465 for (MachineOperand &MO : llvm::drop_begin(MBBI->operands()))
466 NewInstr.add(MO);
467 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
468 MBB->erase(MBBI);
469 MBBI = NewMBBI;
470 }
471 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
472 break;
473 }
474
475 case ARM::tBLXr: // blx r12 (__chkstk)
476 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
477 .addImm(/*Wide=*/0)
478 .setMIFlags(Flags);
479 break;
480
481 case ARM::t2MOVi32imm: // movw+movt
482 // This pseudo instruction expands into two mov instructions. If the
483 // second operand is a symbol reference, this will stay as two wide
484 // instructions, movw+movt. If they're immediates, the first one can
485 // end up as a narrow mov though.
486 // As two SEH instructions are appended here, they won't get interleaved
487 // between the two final movw/movt instructions, but it doesn't make any
488 // practical difference.
489 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
490 .addImm(/*Wide=*/1)
491 .setMIFlags(Flags);
492 MBB->insertAfter(MBBI, MIB);
493 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
494 .addImm(/*Wide=*/1)
495 .setMIFlags(Flags);
496 break;
497
498 case ARM::t2STR_PRE:
499 if (MBBI->getOperand(0).getReg() == ARM::SP &&
500 MBBI->getOperand(2).getReg() == ARM::SP &&
501 MBBI->getOperand(3).getImm() == -4) {
502 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
503 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
504 .addImm(1ULL << Reg)
505 .addImm(/*Wide=*/1)
506 .setMIFlags(Flags);
507 } else {
508 report_fatal_error("No matching SEH Opcode for t2STR_PRE");
509 }
510 break;
511
512 case ARM::t2LDR_POST:
513 if (MBBI->getOperand(1).getReg() == ARM::SP &&
514 MBBI->getOperand(2).getReg() == ARM::SP &&
515 MBBI->getOperand(3).getImm() == 4) {
516 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
517 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
518 .addImm(1ULL << Reg)
519 .addImm(/*Wide=*/1)
520 .setMIFlags(Flags);
521 } else {
522 report_fatal_error("No matching SEH Opcode for t2LDR_POST");
523 }
524 break;
525
526 case ARM::t2LDMIA_RET:
527 case ARM::t2LDMIA_UPD:
528 case ARM::t2STMDB_UPD: {
529 unsigned Mask = 0;
530 bool Wide = false;
531 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
532 const MachineOperand &MO = MBBI->getOperand(i);
533 if (!MO.isReg() || MO.isImplicit())
534 continue;
535 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
536 if (Reg == 15)
537 Reg = 14;
538 if (Reg >= 8 && Reg <= 13)
539 Wide = true;
540 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
541 Wide = true;
542 Mask |= 1 << Reg;
543 }
544 if (!Wide) {
545 unsigned NewOpc;
546 switch (Opc) {
547 case ARM::t2LDMIA_RET:
548 NewOpc = ARM::tPOP_RET;
549 break;
550 case ARM::t2LDMIA_UPD:
551 NewOpc = ARM::tPOP;
552 break;
553 case ARM::t2STMDB_UPD:
554 NewOpc = ARM::tPUSH;
555 break;
556 default:
558 }
559 MachineInstrBuilder NewInstr =
560 BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
561 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
562 NewInstr.add(MBBI->getOperand(i));
563 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
564 MBB->erase(MBBI);
565 MBBI = NewMBBI;
566 }
567 unsigned SEHOpc =
568 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
569 MIB = BuildMI(MF, DL, TII.get(SEHOpc))
570 .addImm(Mask)
571 .addImm(Wide ? 1 : 0)
572 .setMIFlags(Flags);
573 break;
574 }
575 case ARM::VSTMDDB_UPD:
576 case ARM::VLDMDIA_UPD: {
577 int First = -1, Last = 0;
578 for (const MachineOperand &MO : llvm::drop_begin(MBBI->operands(), 4)) {
579 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
580 if (First == -1)
581 First = Reg;
582 Last = Reg;
583 }
584 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
585 .addImm(First)
586 .addImm(Last)
587 .setMIFlags(Flags);
588 break;
589 }
590 case ARM::tSUBspi:
591 case ARM::tADDspi:
592 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
593 .addImm(MBBI->getOperand(2).getImm() * 4)
594 .addImm(/*Wide=*/0)
595 .setMIFlags(Flags);
596 break;
597 case ARM::t2SUBspImm:
598 case ARM::t2SUBspImm12:
599 case ARM::t2ADDspImm:
600 case ARM::t2ADDspImm12:
601 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
602 .addImm(MBBI->getOperand(2).getImm())
603 .addImm(/*Wide=*/1)
604 .setMIFlags(Flags);
605 break;
606
607 case ARM::tMOVr:
608 if (MBBI->getOperand(1).getReg() == ARM::SP &&
609 (Flags & MachineInstr::FrameSetup)) {
610 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
611 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
612 .addImm(Reg)
613 .setMIFlags(Flags);
614 } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
615 (Flags & MachineInstr::FrameDestroy)) {
616 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
617 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
618 .addImm(Reg)
619 .setMIFlags(Flags);
620 } else {
621 report_fatal_error("No SEH Opcode for MOV");
622 }
623 break;
624
625 case ARM::tBX_RET:
626 case ARM::t2BXAUT_RET:
627 case ARM::CLEANUPRET:
628 case ARM::CATCHRET:
629 case ARM::TCRETURNri:
630 case ARM::TCRETURNrinotr12:
631 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
632 .addImm(/*Wide=*/0)
633 .setMIFlags(Flags);
634 break;
635
636 case ARM::TCRETURNdi:
637 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
638 .addImm(/*Wide=*/1)
639 .setMIFlags(Flags);
640 break;
641 }
642 return MBB->insertAfter(MBBI, MIB);
643}
644
647 if (MBBI == MBB.begin())
649 return std::prev(MBBI);
650}
651
655 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
656 if (Start.isValid())
657 Start = std::next(Start);
658 else
659 Start = MBB.begin();
660
661 for (auto MI = Start; MI != End;) {
662 auto Next = std::next(MI);
663 // Check if this instruction already has got a SEH opcode added. In that
664 // case, don't do this generic mapping.
665 if (Next != End && isSEHInstruction(*Next)) {
666 MI = std::next(Next);
667 while (MI != End && isSEHInstruction(*MI))
668 ++MI;
669 continue;
670 }
671 insertSEH(MI, TII, MIFlags);
672 MI = Next;
673 }
674}
675
678 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
679 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
680 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
681 if (isARM)
682 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
683 Pred, PredReg, TII, MIFlags);
684 else
685 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
686 Pred, PredReg, TII, MIFlags);
687}
688
689static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
691 const ARMBaseInstrInfo &TII, int NumBytes,
692 unsigned MIFlags = MachineInstr::NoFlags,
694 unsigned PredReg = 0) {
695 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
696 MIFlags, Pred, PredReg);
697}
698
700 int RegSize;
701 switch (MI.getOpcode()) {
702 case ARM::VSTMDDB_UPD:
703 RegSize = 8;
704 break;
705 case ARM::STMDB_UPD:
706 case ARM::t2STMDB_UPD:
707 RegSize = 4;
708 break;
709 case ARM::t2STR_PRE:
710 case ARM::STR_PRE_IMM:
711 return 4;
712 default:
713 llvm_unreachable("Unknown push or pop like instruction");
714 }
715
716 int count = 0;
717 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
718 // pred) so the list starts at 4.
719 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
720 count += RegSize;
721 return count;
722}
723
725 size_t StackSizeInBytes) {
726 const MachineFrameInfo &MFI = MF.getFrameInfo();
727 const Function &F = MF.getFunction();
728 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
729
730 StackProbeSize =
731 F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);
732 return (StackSizeInBytes >= StackProbeSize) &&
733 !F.hasFnAttribute("no-stack-arg-probe");
734}
735
736namespace {
737
738struct StackAdjustingInsts {
739 struct InstInfo {
741 unsigned SPAdjust;
742 bool BeforeFPSet;
743
744#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
745 void dump() {
746 dbgs() << " " << (BeforeFPSet ? "before-fp " : " ")
747 << "sp-adjust=" << SPAdjust;
748 I->dump();
749 }
750#endif
751 };
752
754
755 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
756 bool BeforeFPSet = false) {
757 InstInfo Info = {I, SPAdjust, BeforeFPSet};
758 Insts.push_back(Info);
759 }
760
761 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
762 auto Info =
763 llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
764 assert(Info != Insts.end() && "invalid sp adjusting instruction");
765 Info->SPAdjust += ExtraBytes;
766 }
767
768 void emitDefCFAOffsets(MachineBasicBlock &MBB, bool HasFP) {
769 CFIInstBuilder CFIBuilder(MBB, MBB.end(), MachineInstr::FrameSetup);
770 unsigned CFAOffset = 0;
771 for (auto &Info : Insts) {
772 if (HasFP && !Info.BeforeFPSet)
773 return;
774
775 CFAOffset += Info.SPAdjust;
776 CFIBuilder.setInsertPoint(std::next(Info.I));
777 CFIBuilder.buildDefCFAOffset(CFAOffset);
778 }
779 }
780
781#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
782 void dump() {
783 dbgs() << "StackAdjustingInsts:\n";
784 for (auto &Info : Insts)
785 Info.dump();
786 }
787#endif
788};
789
790} // end anonymous namespace
791
792/// Emit an instruction sequence that will align the address in
793/// register Reg by zero-ing out the lower bits. For versions of the
794/// architecture that support Neon, this must be done in a single
795/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
796/// single instruction. That function only gets called when optimizing
797/// spilling of D registers on a core with the Neon instruction set
798/// present.
800 const TargetInstrInfo &TII,
803 const DebugLoc &DL, const unsigned Reg,
804 const Align Alignment,
805 const bool MustBeSingleInstruction) {
806 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
807 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
808 const unsigned AlignMask = Alignment.value() - 1U;
809 const unsigned NrBitsToZero = Log2(Alignment);
810 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
811 if (!AFI->isThumbFunction()) {
812 // if the BFC instruction is available, use that to zero the lower
813 // bits:
814 // bfc Reg, #0, log2(Alignment)
815 // otherwise use BIC, if the mask to zero the required number of bits
816 // can be encoded in the bic immediate field
817 // bic Reg, Reg, Alignment-1
818 // otherwise, emit
819 // lsr Reg, Reg, log2(Alignment)
820 // lsl Reg, Reg, log2(Alignment)
821 if (CanUseBFC) {
822 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
824 .addImm(~AlignMask)
826 } else if (AlignMask <= 255) {
827 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
829 .addImm(AlignMask)
831 .add(condCodeOp());
832 } else {
833 assert(!MustBeSingleInstruction &&
834 "Shouldn't call emitAligningInstructions demanding a single "
835 "instruction to be emitted for large stack alignment for a target "
836 "without BFC.");
837 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
839 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
841 .add(condCodeOp());
842 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
844 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
846 .add(condCodeOp());
847 }
848 } else {
849 // Since this is only reached for Thumb-2 targets, the BFC instruction
850 // should always be available.
851 assert(CanUseBFC);
852 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
854 .addImm(~AlignMask)
856 }
857}
858
859/// We need the offset of the frame pointer relative to other MachineFrameInfo
860/// offsets which are encoded relative to SP at function begin.
861/// See also emitPrologue() for how the FP is set up.
862/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
863/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
864/// this to produce a conservative estimate that we check in an assert() later.
865static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
866 const MachineFunction &MF) {
869 // For Thumb1, push.w isn't available, so the first push will always push
870 // r7 and lr onto the stack first.
871 if (AFI.isThumb1OnlyFunction())
872 return -AFI.getArgRegsSaveSize() - (2 * 4);
873 // This is a conservative estimation: Assume the frame pointer being r7 and
874 // pc("r15") up to r8 getting spilled before (= 8 registers).
875 int MaxRegBytes = 8 * 4;
876 if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
877 // Here, r11 can be stored below all of r4-r15.
878 MaxRegBytes = 11 * 4;
879 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
880 // Here, r11 can be stored below all of r4-r15 plus d8-d15.
881 MaxRegBytes = 11 * 4 + 8 * 8;
882 }
883 int FPCXTSaveSize =
884 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
885 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
886}
887
889 MachineBasicBlock &MBB) const {
891 MachineFrameInfo &MFI = MF.getFrameInfo();
893 const TargetMachine &TM = MF.getTarget();
894 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
895 const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
897 "This emitPrologue does not support Thumb1!");
898 bool isARM = !AFI->isThumbFunction();
899 Align Alignment = STI.getFrameLowering()->getStackAlign();
900 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
901 unsigned NumBytes = MFI.getStackSize();
902 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
903 int FPCXTSaveSize = 0;
904 bool NeedsWinCFI = needsWinCFI(MF);
906 STI.getPushPopSplitVariation(MF);
907
908 LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
909
910 // Debug location must be unknown since the first debug location is used
911 // to determine the end of the prologue.
912 DebugLoc dl;
913
914 Register FramePtr = RegInfo->getFrameRegister(MF);
915
916 // Determine the sizes of each callee-save spill areas and record which frame
917 // belongs to which callee-save spill areas.
918 unsigned GPRCS1Size = 0, GPRCS2Size = 0, FPStatusSize = 0,
919 DPRCS1Size = 0, GPRCS3Size = 0, DPRCS2Size = 0;
920 int FramePtrSpillFI = 0;
921 int D8SpillFI = 0;
922
923 // All calls are tail calls in GHC calling conv, and functions have no
924 // prologue/epilogue.
926 return;
927
928 StackAdjustingInsts DefCFAOffsetCandidates;
929 bool HasFP = hasFP(MF);
930
931 if (!AFI->hasStackFrame() &&
932 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
933 if (NumBytes != 0) {
934 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
936 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
937 }
938 if (!NeedsWinCFI)
939 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
940 if (NeedsWinCFI && MBBI != MBB.begin()) {
942 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
944 MF.setHasWinCFI(true);
945 }
946 return;
947 }
948
949 // Determine spill area sizes, and some important frame indices.
950 SpillArea FramePtrSpillArea = SpillArea::GPRCS1;
951 bool BeforeFPPush = true;
952 for (const CalleeSavedInfo &I : CSI) {
953 MCRegister Reg = I.getReg();
954 int FI = I.getFrameIdx();
955
956 SpillArea Area = getSpillArea(Reg, PushPopSplit,
957 AFI->getNumAlignedDPRCS2Regs(), RegInfo);
958
959 if (Reg == FramePtr.asMCReg()) {
960 FramePtrSpillFI = FI;
961 FramePtrSpillArea = Area;
962 }
963 if (Reg == ARM::D8)
964 D8SpillFI = FI;
965
966 switch (Area) {
967 case SpillArea::FPCXT:
968 FPCXTSaveSize += 4;
969 break;
971 GPRCS1Size += 4;
972 break;
974 GPRCS2Size += 4;
975 break;
977 FPStatusSize += 4;
978 break;
980 DPRCS1Size += 8;
981 break;
983 GPRCS3Size += 4;
984 break;
986 DPRCS2Size += 8;
987 break;
988 }
989 }
990
991 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
992 DPRCS1Push, GPRCS3Push;
993
994 // Move past the PAC computation.
995 if (AFI->shouldSignReturnAddress())
996 LastPush = MBBI++;
997
998 // Move past FPCXT area.
999 if (FPCXTSaveSize > 0) {
1000 LastPush = MBBI++;
1001 DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush);
1002 }
1003
1004 // Allocate the vararg register save area.
1005 if (ArgRegsSaveSize) {
1006 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
1008 LastPush = std::prev(MBBI);
1009 DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush);
1010 }
1011
1012 // Move past area 1.
1013 if (GPRCS1Size > 0) {
1014 GPRCS1Push = LastPush = MBBI++;
1015 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush);
1016 if (FramePtrSpillArea == SpillArea::GPRCS1)
1017 BeforeFPPush = false;
1018 }
1019
1020 // Determine starting offsets of spill areas. These offsets are all positive
1021 // offsets from the bottom of the lowest-addressed callee-save area
1022 // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
1023 // of the spill area in question.
1024 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
1025 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
1026 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
1027 unsigned FPStatusOffset = GPRCS2Offset - FPStatusSize;
1028
1029 Align DPRAlign = DPRCS1Size ? std::min(Align(8), Alignment) : Align(4);
1030 unsigned DPRGapSize = (ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1031 GPRCS2Size + FPStatusSize) %
1032 DPRAlign.value();
1033
1034 unsigned DPRCS1Offset = FPStatusOffset - DPRGapSize - DPRCS1Size;
1035
1036 if (HasFP) {
1037 // Offset from the CFA to the saved frame pointer, will be negative.
1038 [[maybe_unused]] int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
1039 LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
1040 << ", FPOffset: " << FPOffset << "\n");
1041 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
1042 "Max FP estimation is wrong");
1043 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
1044 NumBytes);
1045 }
1046 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
1047 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
1048 AFI->setDPRCalleeSavedArea1Offset(DPRCS1Offset);
1049
1050 // Move past area 2.
1051 if (GPRCS2Size > 0) {
1053 GPRCS2Push = LastPush = MBBI++;
1054 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
1055 if (FramePtrSpillArea == SpillArea::GPRCS2)
1056 BeforeFPPush = false;
1057 }
1058
1059 // Move past FP status save area.
1060 if (FPStatusSize > 0) {
1061 while (MBBI != MBB.end()) {
1062 unsigned Opc = MBBI->getOpcode();
1063 if (Opc == ARM::VMRS || Opc == ARM::VMRS_FPEXC)
1064 MBBI++;
1065 else
1066 break;
1067 }
1068 LastPush = MBBI++;
1069 DefCFAOffsetCandidates.addInst(LastPush, FPStatusSize);
1070 }
1071
1072 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
1073 // .cfi_offset operations will reflect that.
1074 if (DPRGapSize) {
1075 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
1076 if (LastPush != MBB.end() &&
1077 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
1078 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
1079 else {
1080 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
1082 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush);
1083 }
1084 }
1085
1086 // Move past DPRCS1Size.
1087 if (DPRCS1Size > 0) {
1088 // Since vpush register list cannot have gaps, there may be multiple vpush
1089 // instructions in the prologue.
1090 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
1091 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI),
1092 BeforeFPPush);
1093 DPRCS1Push = LastPush = MBBI++;
1094 }
1095 }
1096
1097 // Move past the aligned DPRCS2 area.
1098 if (DPRCS2Size > 0) {
1100 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
1101 // leaves the stack pointer pointing to the DPRCS2 area.
1102 //
1103 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
1104 NumBytes += MFI.getObjectOffset(D8SpillFI);
1105 } else
1106 NumBytes = DPRCS1Offset;
1107
1108 // Move GPRCS3, if using using SplitR11WindowsSEH.
1109 if (GPRCS3Size > 0) {
1111 GPRCS3Push = LastPush = MBBI++;
1112 DefCFAOffsetCandidates.addInst(LastPush, GPRCS3Size, BeforeFPPush);
1113 if (FramePtrSpillArea == SpillArea::GPRCS3)
1114 BeforeFPPush = false;
1115 NumBytes -= GPRCS3Size;
1116 }
1117
1118 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
1119 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH && HasFP)
1120 NeedsWinCFIStackAlloc = false;
1121
1122 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
1123 uint32_t NumWords = NumBytes >> 2;
1124
1125 if (NumWords < 65536) {
1126 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
1127 .addImm(NumWords)
1130 } else {
1131 // Split into two instructions here, instead of using t2MOVi32imm,
1132 // to allow inserting accurate SEH instructions (including accurate
1133 // instruction size for each of them).
1134 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
1135 .addImm(NumWords & 0xffff)
1138 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
1139 .addReg(ARM::R4)
1140 .addImm(NumWords >> 16)
1143 }
1144
1145 const ARMTargetLowering *TLI = STI.getTargetLowering();
1146 RTLIB::LibcallImpl ChkStkLibcall = TLI->getLibcallImpl(RTLIB::STACK_PROBE);
1147 if (ChkStkLibcall == RTLIB::Unsupported)
1148 reportFatalUsageError("no available implementation of __chkstk");
1149 const char *ChkStk = TLI->getLibcallImplName(ChkStkLibcall).data();
1150
1151 switch (TM.getCodeModel()) {
1152 case CodeModel::Tiny:
1153 llvm_unreachable("Tiny code model not available on ARM.");
1154 case CodeModel::Small:
1155 case CodeModel::Medium:
1156 case CodeModel::Kernel:
1157 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1159 .addExternalSymbol(ChkStk)
1160 .addReg(ARM::R4, RegState::Implicit)
1162 break;
1163 case CodeModel::Large:
1164 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1165 .addExternalSymbol(ChkStk)
1167
1168 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1170 .addReg(ARM::R12, RegState::Kill)
1171 .addReg(ARM::R4, RegState::Implicit)
1173 break;
1174 }
1175
1176 MachineInstrBuilder Instr, SEH;
1177 Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1178 .addReg(ARM::SP, RegState::Kill)
1179 .addReg(ARM::R4, RegState::Kill)
1182 .add(condCodeOp());
1183 if (NeedsWinCFIStackAlloc) {
1184 SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1185 .addImm(NumBytes)
1186 .addImm(/*Wide=*/1)
1188 MBB.insertAfter(Instr, SEH);
1189 }
1190 NumBytes = 0;
1191 }
1192
1193 if (NumBytes) {
1194 // Adjust SP after all the callee-save spills.
1195 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1196 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
1197 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
1198 else {
1199 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
1201 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
1202 }
1203
1204 if (HasFP && isARM)
1205 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1206 // Note it's not safe to do this in Thumb2 mode because it would have
1207 // taken two instructions:
1208 // mov sp, r7
1209 // sub sp, #24
1210 // If an interrupt is taken between the two instructions, then sp is in
1211 // an inconsistent state (pointing to the middle of callee-saved area).
1212 // The interrupt handler can end up clobbering the registers.
1213 AFI->setShouldRestoreSPFromFP(true);
1214 }
1215
1216 // Set FP to point to the stack slot that contains the previous FP.
1217 // For iOS, FP is R7, which has now been stored in spill area 1.
1218 // Otherwise, if this is not iOS, all the callee-saved registers go
1219 // into spill area 1, including the FP in R11. In either case, it
1220 // is in area one and the adjustment needs to take place just after
1221 // that push.
1223 if (HasFP) {
1224 MachineBasicBlock::iterator FPPushInst;
1225 // Offset from SP immediately after the push which saved the FP to the FP
1226 // save slot.
1227 int64_t FPOffsetAfterPush;
1228 switch (FramePtrSpillArea) {
1229 case SpillArea::GPRCS1:
1230 FPPushInst = GPRCS1Push;
1231 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1232 ArgRegsSaveSize + FPCXTSaveSize +
1233 sizeOfSPAdjustment(*FPPushInst);
1234 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1235 << FPOffsetAfterPush << " after that push\n");
1236 break;
1237 case SpillArea::GPRCS2:
1238 FPPushInst = GPRCS2Push;
1239 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1240 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1241 sizeOfSPAdjustment(*FPPushInst);
1242 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1243 << FPOffsetAfterPush << " after that push\n");
1244 break;
1245 case SpillArea::GPRCS3:
1246 FPPushInst = GPRCS3Push;
1247 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1248 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1249 FPStatusSize + GPRCS2Size + DPRCS1Size + DPRGapSize +
1250 sizeOfSPAdjustment(*FPPushInst);
1251 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS3, offset "
1252 << FPOffsetAfterPush << " after that push\n");
1253 break;
1254 default:
1255 llvm_unreachable("frame pointer in unknown spill area");
1256 break;
1257 }
1258 AfterPush = std::next(FPPushInst);
1259 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1260 assert(FPOffsetAfterPush == 0);
1261
1262 // Emit the MOV or ADD to set up the frame pointer register.
1263 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1264 FramePtr, ARM::SP, FPOffsetAfterPush,
1266
1267 if (!NeedsWinCFI) {
1268 // Emit DWARF info to find the CFA using the frame pointer from this
1269 // point onward.
1270 CFIInstBuilder CFIBuilder(MBB, AfterPush, MachineInstr::FrameSetup);
1271 if (FPOffsetAfterPush != 0)
1272 CFIBuilder.buildDefCFA(FramePtr, -MFI.getObjectOffset(FramePtrSpillFI));
1273 else
1274 CFIBuilder.buildDefCFARegister(FramePtr);
1275 }
1276 }
1277
1278 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1279 // instructions below don't need to be replayed to unwind the stack.
1280 if (NeedsWinCFI && MBBI != MBB.begin()) {
1282 if (HasFP && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1283 End = AfterPush;
1285 BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1287 MF.setHasWinCFI(true);
1288 }
1289
1290 // Now that the prologue's actual instructions are finalised, we can insert
1291 // the necessary DWARF cf instructions to describe the situation. Start by
1292 // recording where each register ended up:
1293 if (!NeedsWinCFI) {
1294 for (const auto &Entry : reverse(CSI)) {
1295 MCRegister Reg = Entry.getReg();
1296 int FI = Entry.getFrameIdx();
1298 switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
1299 RegInfo)) {
1300 case SpillArea::GPRCS1:
1301 CFIPos = std::next(GPRCS1Push);
1302 break;
1303 case SpillArea::GPRCS2:
1304 CFIPos = std::next(GPRCS2Push);
1305 break;
1306 case SpillArea::DPRCS1:
1307 CFIPos = std::next(DPRCS1Push);
1308 break;
1309 case SpillArea::GPRCS3:
1310 CFIPos = std::next(GPRCS3Push);
1311 break;
1313 case SpillArea::FPCXT:
1314 case SpillArea::DPRCS2:
1315 // FPCXT and DPRCS2 are not represented in the DWARF info.
1316 break;
1317 }
1318
1319 if (CFIPos.isValid()) {
1321 .buildOffset(Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg,
1322 MFI.getObjectOffset(FI));
1323 }
1324 }
1325 }
1326
1327 // Now we can emit descriptions of where the canonical frame address was
1328 // throughout the process. If we have a frame pointer, it takes over the job
1329 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1330 // actually get emitted.
1331 if (!NeedsWinCFI) {
1332 LLVM_DEBUG(DefCFAOffsetCandidates.dump());
1333 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
1334 }
1335
1336 if (STI.isTargetELF() && hasFP(MF))
1338 AFI->getFramePtrSpillOffset());
1339
1340 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1341 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1342 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1343 AFI->setFPStatusSavesSize(FPStatusSize);
1344 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1345 AFI->setDPRCalleeSavedArea1Size(DPRCS1Size);
1346 AFI->setGPRCalleeSavedArea3Size(GPRCS3Size);
1347
1348 // If we need dynamic stack realignment, do it here. Be paranoid and make
1349 // sure if we also have VLAs, we have a base pointer for frame access.
1350 // If aligned NEON registers were spilled, the stack has already been
1351 // realigned.
1352 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1353 Align MaxAlign = MFI.getMaxAlign();
1355 if (!AFI->isThumbFunction()) {
1356 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1357 false);
1358 } else {
1359 // We cannot use sp as source/dest register here, thus we're using r4 to
1360 // perform the calculations. We're emitting the following sequence:
1361 // mov r4, sp
1362 // -- use emitAligningInstructions to produce best sequence to zero
1363 // -- out lower bits in r4
1364 // mov sp, r4
1365 // FIXME: It will be better just to find spare register here.
1366 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1367 .addReg(ARM::SP, RegState::Kill)
1369 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1370 false);
1371 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1372 .addReg(ARM::R4, RegState::Kill)
1374 }
1375
1376 AFI->setShouldRestoreSPFromFP(true);
1377 }
1378
1379 // If we need a base pointer, set it up here. It's whatever the value
1380 // of the stack pointer is at this point. Any variable size objects
1381 // will be allocated after this, so we can still use the base pointer
1382 // to reference locals.
1383 // FIXME: Clarify FrameSetup flags here.
1384 if (RegInfo->hasBasePointer(MF) && !MBB.isEHFuncletEntry()) {
1385 if (isARM)
1386 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1387 .addReg(ARM::SP)
1389 .add(condCodeOp());
1390 else
1391 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1392 .addReg(ARM::SP)
1394 }
1395
1396 // If the frame has variable sized objects then the epilogue must restore
1397 // the sp from fp. We can assume there's an FP here since hasFP already
1398 // checks for hasVarSizedObjects.
1399 if (MFI.hasVarSizedObjects())
1400 AFI->setShouldRestoreSPFromFP(true);
1401}
1402
1404 MachineBasicBlock &MBB) const {
1405 MachineFrameInfo &MFI = MF.getFrameInfo();
1407 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1408 const ARMBaseInstrInfo &TII =
1409 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1410 assert(!AFI->isThumb1OnlyFunction() &&
1411 "This emitEpilogue does not support Thumb1!");
1412 bool isARM = !AFI->isThumbFunction();
1414 STI.getPushPopSplitVariation(MF);
1415
1416 LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF.getName() << "\n");
1417
1418 // Amount of stack space we reserved next to incoming args for either
1419 // varargs registers or stack arguments in tail calls made by this function.
1420 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1421
1422 // How much of the stack used by incoming arguments this function is expected
1423 // to restore in this particular epilogue.
1424 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1425 int NumBytes = (int)MFI.getStackSize();
1426 Register FramePtr = RegInfo->getFrameRegister(MF);
1427
1428 // All calls are tail calls in GHC calling conv, and functions have no
1429 // prologue/epilogue.
1431 return;
1432
1433 // First put ourselves on the first (from top) terminator instructions.
1434 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1435 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1436
1437 MachineBasicBlock::iterator RangeStart;
1438 if (!AFI->hasStackFrame()) {
1439 if (MF.hasWinCFI()) {
1440 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1442 RangeStart = initMBBRange(MBB, MBBI);
1443 }
1444
1445 if (NumBytes + IncomingArgStackToRestore != 0)
1446 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1447 NumBytes + IncomingArgStackToRestore,
1449 } else {
1450 // Unwind MBBI to point to first LDR / VLDRD.
1451 if (MBBI != MBB.begin()) {
1452 do {
1453 --MBBI;
1454 } while (MBBI != MBB.begin() &&
1456 if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1457 ++MBBI;
1458 }
1459
1460 if (MF.hasWinCFI()) {
1461 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1463 RangeStart = initMBBRange(MBB, MBBI);
1464 }
1465
1466 // Move SP to start of FP callee save spill area.
1467 NumBytes -=
1468 (ReservedArgStack + AFI->getFPCXTSaveAreaSize() +
1472
1473 // Reset SP based on frame pointer only if the stack frame extends beyond
1474 // frame pointer stack slot or target is ELF and the function has FP.
1475 if (AFI->shouldRestoreSPFromFP()) {
1476 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1477 if (NumBytes) {
1478 if (isARM)
1479 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1480 ARMCC::AL, 0, TII,
1482 else {
1483 // It's not possible to restore SP from FP in a single instruction.
1484 // For iOS, this looks like:
1485 // mov sp, r7
1486 // sub sp, #24
1487 // This is bad, if an interrupt is taken after the mov, sp is in an
1488 // inconsistent state.
1489 // Use the first callee-saved register as a scratch register.
1490 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1491 "No scratch register to restore SP from FP!");
1492 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1494 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1495 .addReg(ARM::R4)
1498 }
1499 } else {
1500 // Thumb2 or ARM.
1501 if (isARM)
1502 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1505 .add(condCodeOp())
1507 else
1508 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1512 }
1513 } else if (NumBytes &&
1514 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1515 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1517
1518 // Increment past our save areas.
1519 if (AFI->getGPRCalleeSavedArea3Size()) {
1521 (void)PushPopSplit;
1522 MBBI++;
1523 }
1524
1525 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedArea1Size()) {
1526 MBBI++;
1527 // Since vpop register list cannot have gaps, there may be multiple vpop
1528 // instructions in the epilogue.
1529 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1530 MBBI++;
1531 }
1532 if (AFI->getDPRCalleeSavedGapSize()) {
1533 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1534 "unexpected DPR alignment gap");
1535 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1537 }
1538
1539 if (AFI->getGPRCalleeSavedArea2Size()) {
1541 (void)PushPopSplit;
1542 MBBI++;
1543 }
1544 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1545
1546 if (ReservedArgStack || IncomingArgStackToRestore) {
1547 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1548 "attempting to restore negative stack amount");
1549 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1550 ReservedArgStack + IncomingArgStackToRestore,
1552 }
1553
1554 // Validate PAC, It should have been already popped into R12. For CMSE entry
1555 // function, the validation instruction is emitted during expansion of the
1556 // tBXNS_RET, since the validation must use the value of SP at function
1557 // entry, before saving, resp. after restoring, FPCXTNS.
1558 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction()) {
1559 bool CanUseBXAut =
1560 STI.isThumb() && STI.hasV8_1MMainlineOps() && STI.hasPACBTI();
1561 auto TMBBI = MBB.getFirstTerminator();
1562 bool IsBXReturn =
1563 TMBBI != MBB.end() && TMBBI->getOpcode() == ARM::tBX_RET;
1564 if (IsBXReturn && CanUseBXAut)
1565 TMBBI->setDesc(STI.getInstrInfo()->get(ARM::t2BXAUT_RET));
1566 else
1567 BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1568 }
1569 }
1570
1571 if (MF.hasWinCFI()) {
1572 insertSEHRange(MBB, RangeStart, MBB.end(), TII, MachineInstr::FrameDestroy);
1573 BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1575 }
1576}
1577
1578/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1579/// debug info. It's the same as what we use for resolving the code-gen
1580/// references for now. FIXME: This can go wrong when references are
1581/// SP-relative and simple call frames aren't used.
1583 int FI,
1584 Register &FrameReg) const {
1585 return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1586}
1587
1590 int FI) const {
1591 const MachineFrameInfo &MFI = MF.getFrameInfo();
1592 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1594}
1595
1597 int FI, Register &FrameReg,
1598 int SPAdj) const {
1599 const MachineFrameInfo &MFI = MF.getFrameInfo();
1600 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1601 MF.getSubtarget().getRegisterInfo());
1602 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1603 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1604 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1605 bool isFixed = MFI.isFixedObjectIndex(FI);
1606
1607 FrameReg = ARM::SP;
1608 Offset += SPAdj;
1609
1610 // SP can move around if there are allocas. We may also lose track of SP
1611 // when emergency spilling inside a non-reserved call frame setup.
1612 bool hasMovingSP = !hasReservedCallFrame(MF);
1613
1614 // When dynamically realigning the stack, use the frame pointer for
1615 // parameters, and the stack/base pointer for locals.
1616 if (RegInfo->hasStackRealignment(MF)) {
1617 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1618 if (isFixed) {
1619 FrameReg = RegInfo->getFrameRegister(MF);
1620 Offset = FPOffset;
1621 } else if (hasMovingSP) {
1622 assert(RegInfo->hasBasePointer(MF) &&
1623 "VLAs and dynamic stack alignment, but missing base pointer!");
1624 FrameReg = RegInfo->getBaseRegister();
1625 Offset -= SPAdj;
1626 }
1627 return Offset;
1628 }
1629
1630 // If there is a frame pointer, use it when we can.
1631 if (hasFP(MF) && AFI->hasStackFrame()) {
1632 // Use frame pointer to reference fixed objects. Use it for locals if
1633 // there are VLAs (and thus the SP isn't reliable as a base).
1634 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1635 FrameReg = RegInfo->getFrameRegister(MF);
1636 return FPOffset;
1637 } else if (hasMovingSP) {
1638 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1639 if (AFI->isThumb2Function()) {
1640 // Try to use the frame pointer if we can, else use the base pointer
1641 // since it's available. This is handy for the emergency spill slot, in
1642 // particular.
1643 if (FPOffset >= -255 && FPOffset < 0) {
1644 FrameReg = RegInfo->getFrameRegister(MF);
1645 return FPOffset;
1646 }
1647 }
1648 } else if (AFI->isThumbFunction()) {
1649 // Prefer SP to base pointer, if the offset is suitably aligned and in
1650 // range as the effective range of the immediate offset is bigger when
1651 // basing off SP.
1652 // Use add <rd>, sp, #<imm8>
1653 // ldr <rd>, [sp, #<imm8>]
1654 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1655 return Offset;
1656 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1657 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1658 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1659 FrameReg = RegInfo->getFrameRegister(MF);
1660 return FPOffset;
1661 }
1662 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1663 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1664 FrameReg = RegInfo->getFrameRegister(MF);
1665 return FPOffset;
1666 }
1667 }
1668 // Use the base pointer if we have one.
1669 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1670 // That can happen if we forced a base pointer for a large call frame.
1671 if (RegInfo->hasBasePointer(MF)) {
1672 FrameReg = RegInfo->getBaseRegister();
1673 Offset -= SPAdj;
1674 }
1675 return Offset;
1676}
1677
1678void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1681 unsigned StmOpc, unsigned StrOpc,
1682 bool NoGap,
1683 function_ref<bool(unsigned)> Func) const {
1684 MachineFunction &MF = *MBB.getParent();
1687
1688 DebugLoc DL;
1689
1690 using RegAndKill = std::pair<unsigned, bool>;
1691
1693 unsigned i = CSI.size();
1694 while (i != 0) {
1695 unsigned LastReg = 0;
1696 for (; i != 0; --i) {
1697 MCRegister Reg = CSI[i-1].getReg();
1698 if (!Func(Reg))
1699 continue;
1700
1701 const MachineRegisterInfo &MRI = MF.getRegInfo();
1702 bool isLiveIn = MRI.isLiveIn(Reg);
1703 if (!isLiveIn && !MRI.isReserved(Reg))
1704 MBB.addLiveIn(Reg);
1705 // If NoGap is true, push consecutive registers and then leave the rest
1706 // for other instructions. e.g.
1707 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1708 if (NoGap && LastReg && LastReg != Reg-1)
1709 break;
1710 LastReg = Reg;
1711 // Do not set a kill flag on values that are also marked as live-in. This
1712 // happens with the @llvm-returnaddress intrinsic and with arguments
1713 // passed in callee saved registers.
1714 // Omitting the kill flags is conservatively correct even if the live-in
1715 // is not used after all.
1716 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1717 }
1718
1719 if (Regs.empty())
1720 continue;
1721
1722 llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1723 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1724 });
1725
1726 if (Regs.size() > 1 || StrOpc== 0) {
1727 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1728 .addReg(ARM::SP)
1731 for (const auto &[Reg, Kill] : Regs)
1733 } else if (Regs.size() == 1) {
1734 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1735 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1736 .addReg(ARM::SP)
1738 .addImm(-4)
1740 }
1741 Regs.clear();
1742
1743 // Put any subsequent vpush instructions before this one: they will refer to
1744 // higher register numbers so need to be pushed first in order to preserve
1745 // monotonicity.
1746 if (MI != MBB.begin())
1747 --MI;
1748 }
1749}
1750
1751void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1754 unsigned LdmOpc, unsigned LdrOpc,
1755 bool isVarArg, bool NoGap,
1756 function_ref<bool(unsigned)> Func) const {
1757 MachineFunction &MF = *MBB.getParent();
1758 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1759 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1760 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1761 bool hasPAC = AFI->shouldSignReturnAddress();
1762 DebugLoc DL;
1763 bool isTailCall = false;
1764 bool isInterrupt = false;
1765 bool isTrap = false;
1766 bool isCmseEntry = false;
1768 STI.getPushPopSplitVariation(MF);
1769 if (MBB.end() != MI) {
1770 DL = MI->getDebugLoc();
1771 unsigned RetOpcode = MI->getOpcode();
1772 isTailCall =
1773 (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri ||
1774 RetOpcode == ARM::TCRETURNrinotr12);
1775 isInterrupt =
1776 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1777 isTrap = RetOpcode == ARM::TRAP || RetOpcode == ARM::tTRAP;
1778 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1779 }
1780
1781 SmallVector<unsigned, 4> Regs;
1782 unsigned i = CSI.size();
1783 while (i != 0) {
1784 unsigned LastReg = 0;
1785 bool DeleteRet = false;
1786 for (; i != 0; --i) {
1787 CalleeSavedInfo &Info = CSI[i-1];
1788 MCRegister Reg = Info.getReg();
1789 if (!Func(Reg))
1790 continue;
1791
1792 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1793 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1794 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1795 (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1796 PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
1797 Reg = ARM::PC;
1798 // Fold the return instruction into the LDM.
1799 DeleteRet = true;
1800 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1801 }
1802
1803 // If NoGap is true, pop consecutive registers and then leave the rest
1804 // for other instructions. e.g.
1805 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1806 if (NoGap && LastReg && LastReg != Reg-1)
1807 break;
1808
1809 LastReg = Reg;
1810 Regs.push_back(Reg);
1811 }
1812
1813 if (Regs.empty())
1814 continue;
1815
1816 llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1817 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1818 });
1819
1820 if (Regs.size() > 1 || LdrOpc == 0) {
1821 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1822 .addReg(ARM::SP)
1825 for (unsigned Reg : Regs)
1826 MIB.addReg(Reg, getDefRegState(true));
1827 if (DeleteRet) {
1828 if (MI != MBB.end()) {
1829 MIB.copyImplicitOps(*MI);
1830 MI->eraseFromParent();
1831 }
1832 }
1833 MI = MIB;
1834 } else if (Regs.size() == 1) {
1835 // If we adjusted the reg to PC from LR above, switch it back here. We
1836 // only do that for LDM.
1837 if (Regs[0] == ARM::PC)
1838 Regs[0] = ARM::LR;
1839 MachineInstrBuilder MIB =
1840 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1841 .addReg(ARM::SP, RegState::Define)
1842 .addReg(ARM::SP)
1844 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1845 // that refactoring is complete (eventually).
1846 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1847 MIB.addReg(0);
1849 } else
1850 MIB.addImm(4);
1851 MIB.add(predOps(ARMCC::AL));
1852 }
1853 Regs.clear();
1854
1855 // Put any subsequent vpop instructions after this one: they will refer to
1856 // higher register numbers so need to be popped afterwards.
1857 if (MI != MBB.end())
1858 ++MI;
1859 }
1860}
1861
1862void ARMFrameLowering::emitFPStatusSaves(MachineBasicBlock &MBB,
1865 unsigned PushOpc) const {
1866 MachineFunction &MF = *MBB.getParent();
1867 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1868
1870 auto RegPresent = [&CSI](MCRegister Reg) {
1871 return llvm::any_of(CSI, [Reg](const CalleeSavedInfo &C) {
1872 return C.getReg() == Reg;
1873 });
1874 };
1875
1876 // If we need to save FPSCR, then we must move FPSCR into R4 with the VMRS
1877 // instruction.
1878 if (RegPresent(ARM::FPSCR)) {
1879 BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::VMRS), ARM::R4)
1882
1883 Regs.push_back(ARM::R4);
1884 }
1885
1886 // If we need to save FPEXC, then we must move FPEXC into R5 with the
1887 // VMRS_FPEXC instruction.
1888 if (RegPresent(ARM::FPEXC)) {
1889 BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::VMRS_FPEXC), ARM::R5)
1892
1893 Regs.push_back(ARM::R5);
1894 }
1895
1896 // If neither FPSCR and FPEXC are present, then do nothing.
1897 if (Regs.size() == 0)
1898 return;
1899
1900 // Push both R4 and R5 onto the stack, if present.
1901 MachineInstrBuilder MIB =
1902 BuildMI(MBB, MI, DebugLoc(), TII.get(PushOpc), ARM::SP)
1903 .addReg(ARM::SP)
1906
1907 for (Register Reg : Regs) {
1908 MIB.addReg(Reg);
1909 }
1910}
1911
1912void ARMFrameLowering::emitFPStatusRestores(
1914 MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc) const {
1915 MachineFunction &MF = *MBB.getParent();
1916 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1917
1918 auto RegPresent = [&CSI](MCRegister Reg) {
1919 return llvm::any_of(CSI, [Reg](const CalleeSavedInfo &C) {
1920 return C.getReg() == Reg;
1921 });
1922 };
1923
1924 // Do nothing if we don't need to restore any FP status registers.
1925 if (!RegPresent(ARM::FPSCR) && !RegPresent(ARM::FPEXC))
1926 return;
1927
1928 // Pop registers off of the stack.
1929 MachineInstrBuilder MIB =
1930 BuildMI(MBB, MI, DebugLoc(), TII.get(LdmOpc), ARM::SP)
1931 .addReg(ARM::SP)
1934
1935 // If FPSCR was saved, it will be popped into R4.
1936 if (RegPresent(ARM::FPSCR)) {
1937 MIB.addReg(ARM::R4, RegState::Define);
1938 }
1939
1940 // If FPEXC was saved, it will be popped into R5.
1941 if (RegPresent(ARM::FPEXC)) {
1942 MIB.addReg(ARM::R5, RegState::Define);
1943 }
1944
1945 // Move the FPSCR value back into the register with the VMSR instruction.
1946 if (RegPresent(ARM::FPSCR)) {
1947 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VMSR))
1948 .addReg(ARM::R4)
1951 }
1952
1953 // Move the FPEXC value back into the register with the VMSR_FPEXC
1954 // instruction.
1955 if (RegPresent(ARM::FPEXC)) {
1956 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VMSR_FPEXC))
1957 .addReg(ARM::R5)
1960 }
1961}
1962
1963/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1964/// starting from d8. Also insert stack realignment code and leave the stack
1965/// pointer pointing to the d8 spill slot.
1968 unsigned NumAlignedDPRCS2Regs,
1970 const TargetRegisterInfo *TRI) {
1971 MachineFunction &MF = *MBB.getParent();
1973 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1975 MachineFrameInfo &MFI = MF.getFrameInfo();
1976
1977 // Mark the D-register spill slots as properly aligned. Since MFI computes
1978 // stack slot layout backwards, this can actually mean that the d-reg stack
1979 // slot offsets can be wrong. The offset for d8 will always be correct.
1980 for (const CalleeSavedInfo &I : CSI) {
1981 unsigned DNum = I.getReg() - ARM::D8;
1982 if (DNum > NumAlignedDPRCS2Regs - 1)
1983 continue;
1984 int FI = I.getFrameIdx();
1985 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1986 // registers will be 8-byte aligned.
1987 MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1988
1989 // The stack slot for D8 needs to be maximally aligned because this is
1990 // actually the point where we align the stack pointer. MachineFrameInfo
1991 // computes all offsets relative to the incoming stack pointer which is a
1992 // bit weird when realigning the stack. Any extra padding for this
1993 // over-alignment is not realized because the code inserted below adjusts
1994 // the stack pointer by numregs * 8 before aligning the stack pointer.
1995 if (DNum == 0)
1996 MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1997 }
1998
1999 // Move the stack pointer to the d8 spill slot, and align it at the same
2000 // time. Leave the stack slot address in the scratch register r4.
2001 //
2002 // sub r4, sp, #numregs * 8
2003 // bic r4, r4, #align - 1
2004 // mov sp, r4
2005 //
2006 bool isThumb = AFI->isThumbFunction();
2007 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
2008 AFI->setShouldRestoreSPFromFP(true);
2009
2010 // sub r4, sp, #numregs * 8
2011 // The immediate is <= 64, so it doesn't need any special encoding.
2012 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
2013 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
2014 .addReg(ARM::SP)
2015 .addImm(8 * NumAlignedDPRCS2Regs)
2017 .add(condCodeOp());
2018
2019 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
2020 // We must set parameter MustBeSingleInstruction to true, since
2021 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
2022 // stack alignment. Luckily, this can always be done since all ARM
2023 // architecture versions that support Neon also support the BFC
2024 // instruction.
2025 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
2026
2027 // mov sp, r4
2028 // The stack pointer must be adjusted before spilling anything, otherwise
2029 // the stack slots could be clobbered by an interrupt handler.
2030 // Leave r4 live, it is used below.
2031 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
2032 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
2033 .addReg(ARM::R4)
2035 if (!isThumb)
2036 MIB.add(condCodeOp());
2037
2038 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
2039 // r4 holds the stack slot address.
2040 unsigned NextReg = ARM::D8;
2041
2042 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
2043 // The writeback is only needed when emitting two vst1.64 instructions.
2044 if (NumAlignedDPRCS2Regs >= 6) {
2045 MCRegister SupReg =
2046 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2047 MBB.addLiveIn(SupReg);
2048 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
2049 .addReg(ARM::R4, RegState::Kill)
2050 .addImm(16)
2051 .addReg(NextReg)
2054 NextReg += 4;
2055 NumAlignedDPRCS2Regs -= 4;
2056 }
2057
2058 // We won't modify r4 beyond this point. It currently points to the next
2059 // register to be spilled.
2060 unsigned R4BaseReg = NextReg;
2061
2062 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
2063 if (NumAlignedDPRCS2Regs >= 4) {
2064 MCRegister SupReg =
2065 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2066 MBB.addLiveIn(SupReg);
2067 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
2068 .addReg(ARM::R4)
2069 .addImm(16)
2070 .addReg(NextReg)
2073 NextReg += 4;
2074 NumAlignedDPRCS2Regs -= 4;
2075 }
2076
2077 // 16-byte aligned vst1.64 with 2 d-regs.
2078 if (NumAlignedDPRCS2Regs >= 2) {
2079 MCRegister SupReg =
2080 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass);
2081 MBB.addLiveIn(SupReg);
2082 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
2083 .addReg(ARM::R4)
2084 .addImm(16)
2085 .addReg(SupReg)
2087 NextReg += 2;
2088 NumAlignedDPRCS2Regs -= 2;
2089 }
2090
2091 // Finally, use a vanilla vstr.64 for the odd last register.
2092 if (NumAlignedDPRCS2Regs) {
2093 MBB.addLiveIn(NextReg);
2094 // vstr.64 uses addrmode5 which has an offset scale of 4.
2095 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
2096 .addReg(NextReg)
2097 .addReg(ARM::R4)
2098 .addImm((NextReg - R4BaseReg) * 2)
2100 }
2101
2102 // The last spill instruction inserted should kill the scratch register r4.
2103 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
2104}
2105
2106/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
2107/// iterator to the following instruction.
2110 unsigned NumAlignedDPRCS2Regs) {
2111 // sub r4, sp, #numregs * 8
2112 // bic r4, r4, #align - 1
2113 // mov sp, r4
2114 ++MI; ++MI; ++MI;
2115 assert(MI->mayStore() && "Expecting spill instruction");
2116
2117 // These switches all fall through.
2118 switch(NumAlignedDPRCS2Regs) {
2119 case 7:
2120 ++MI;
2121 assert(MI->mayStore() && "Expecting spill instruction");
2122 [[fallthrough]];
2123 default:
2124 ++MI;
2125 assert(MI->mayStore() && "Expecting spill instruction");
2126 [[fallthrough]];
2127 case 1:
2128 case 2:
2129 case 4:
2130 assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");
2131 ++MI;
2132 }
2133 return MI;
2134}
2135
2136/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
2137/// starting from d8. These instructions are assumed to execute while the
2138/// stack is still aligned, unlike the code inserted by emitPopInst.
2141 unsigned NumAlignedDPRCS2Regs,
2143 const TargetRegisterInfo *TRI) {
2144 MachineFunction &MF = *MBB.getParent();
2146 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
2148
2149 // Find the frame index assigned to d8.
2150 int D8SpillFI = 0;
2151 for (const CalleeSavedInfo &I : CSI)
2152 if (I.getReg() == ARM::D8) {
2153 D8SpillFI = I.getFrameIdx();
2154 break;
2155 }
2156
2157 // Materialize the address of the d8 spill slot into the scratch register r4.
2158 // This can be fairly complicated if the stack frame is large, so just use
2159 // the normal frame index elimination mechanism to do it. This code runs as
2160 // the initial part of the epilog where the stack and base pointers haven't
2161 // been changed yet.
2162 bool isThumb = AFI->isThumbFunction();
2163 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
2164
2165 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
2166 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
2167 .addFrameIndex(D8SpillFI)
2168 .addImm(0)
2170 .add(condCodeOp());
2171
2172 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
2173 unsigned NextReg = ARM::D8;
2174
2175 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
2176 if (NumAlignedDPRCS2Regs >= 6) {
2177 MCRegister SupReg =
2178 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2179 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
2180 .addReg(ARM::R4, RegState::Define)
2181 .addReg(ARM::R4, RegState::Kill)
2182 .addImm(16)
2185 NextReg += 4;
2186 NumAlignedDPRCS2Regs -= 4;
2187 }
2188
2189 // We won't modify r4 beyond this point. It currently points to the next
2190 // register to be spilled.
2191 unsigned R4BaseReg = NextReg;
2192
2193 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
2194 if (NumAlignedDPRCS2Regs >= 4) {
2195 MCRegister SupReg =
2196 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2197 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
2198 .addReg(ARM::R4)
2199 .addImm(16)
2202 NextReg += 4;
2203 NumAlignedDPRCS2Regs -= 4;
2204 }
2205
2206 // 16-byte aligned vld1.64 with 2 d-regs.
2207 if (NumAlignedDPRCS2Regs >= 2) {
2208 MCRegister SupReg =
2209 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass);
2210 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
2211 .addReg(ARM::R4)
2212 .addImm(16)
2214 NextReg += 2;
2215 NumAlignedDPRCS2Regs -= 2;
2216 }
2217
2218 // Finally, use a vanilla vldr.64 for the remaining odd register.
2219 if (NumAlignedDPRCS2Regs)
2220 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
2221 .addReg(ARM::R4)
2222 .addImm(2 * (NextReg - R4BaseReg))
2224
2225 // Last store kills r4.
2226 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
2227}
2228
2232 if (CSI.empty())
2233 return false;
2234
2235 MachineFunction &MF = *MBB.getParent();
2238 STI.getPushPopSplitVariation(MF);
2239 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2240
2241 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
2242 unsigned PushOneOpc = AFI->isThumbFunction() ?
2243 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
2244 unsigned FltOpc = ARM::VSTMDDB_UPD;
2245 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2246 // Compute PAC in R12.
2247 if (AFI->shouldSignReturnAddress()) {
2248 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
2250 }
2251 // Save the non-secure floating point context.
2252 if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
2253 return C.getReg() == ARM::FPCXTNS;
2254 })) {
2255 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
2256 ARM::SP)
2257 .addReg(ARM::SP)
2258 .addImm(-4)
2260 }
2261
2262 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2263 RegInfo](unsigned Reg, SpillArea TestArea) {
2264 return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2265 TestArea;
2266 };
2267 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2268 return CheckRegArea(Reg, SpillArea::GPRCS1);
2269 };
2270 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2271 return CheckRegArea(Reg, SpillArea::GPRCS2);
2272 };
2273 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2274 return CheckRegArea(Reg, SpillArea::DPRCS1);
2275 };
2276 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2277 return CheckRegArea(Reg, SpillArea::GPRCS3);
2278 };
2279
2280 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS1);
2281 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS2);
2282 emitFPStatusSaves(MBB, MI, CSI, PushOpc);
2283 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, IsDPRCS1);
2284 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS3);
2285
2286 // The code above does not insert spill code for the aligned DPRCS2 registers.
2287 // The stack realignment code will be inserted between the push instructions
2288 // and these spills.
2289 if (NumAlignedDPRCS2Regs)
2290 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2291
2292 return true;
2293}
2294
2298 if (CSI.empty())
2299 return false;
2300
2301 MachineFunction &MF = *MBB.getParent();
2303 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2304
2305 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2306 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2308 STI.getPushPopSplitVariation(MF);
2309
2310 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2311 // registers. Do that here instead.
2312 if (NumAlignedDPRCS2Regs)
2313 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2314
2315 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2316 unsigned LdrOpc =
2317 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2318 unsigned FltOpc = ARM::VLDMDIA_UPD;
2319
2320 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2321 RegInfo](unsigned Reg, SpillArea TestArea) {
2322 return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2323 TestArea;
2324 };
2325 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2326 return CheckRegArea(Reg, SpillArea::GPRCS1);
2327 };
2328 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2329 return CheckRegArea(Reg, SpillArea::GPRCS2);
2330 };
2331 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2332 return CheckRegArea(Reg, SpillArea::DPRCS1);
2333 };
2334 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2335 return CheckRegArea(Reg, SpillArea::GPRCS3);
2336 };
2337
2338 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS3);
2339 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, IsDPRCS1);
2340 emitFPStatusRestores(MBB, MI, CSI, PopOpc);
2341 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS2);
2342 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS1);
2343
2344 return true;
2345}
2346
2347// FIXME: Make generic?
2349 const ARMBaseInstrInfo &TII) {
2350 unsigned FnSize = 0;
2351 for (auto &MBB : MF) {
2352 for (auto &MI : MBB)
2353 FnSize += TII.getInstSizeInBytes(MI);
2354 }
2355 if (MF.getJumpTableInfo())
2356 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2357 FnSize += Table.MBBs.size() * 4;
2358 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2359 LLVM_DEBUG(dbgs() << "Estimated function size for " << MF.getName() << " = "
2360 << FnSize << " bytes\n");
2361 return FnSize;
2362}
2363
2364/// estimateRSStackSizeLimit - Look at each instruction that references stack
2365/// frames and return the stack size limit beyond which some of these
2366/// instructions will require a scratch register during their expansion later.
2367// FIXME: Move to TII?
2369 const TargetFrameLowering *TFI,
2370 bool &HasNonSPFrameIndex) {
2371 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2372 const ARMBaseInstrInfo &TII =
2373 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2374 unsigned Limit = (1 << 12) - 1;
2375 for (auto &MBB : MF) {
2376 for (auto &MI : MBB) {
2377 if (MI.isDebugInstr())
2378 continue;
2379 if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE)
2380 continue;
2381 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2382 if (!MI.getOperand(i).isFI())
2383 continue;
2384
2385 // When using ADDri to get the address of a stack object, 255 is the
2386 // largest offset guaranteed to fit in the immediate offset.
2387 if (MI.getOpcode() == ARM::ADDri) {
2388 Limit = std::min(Limit, (1U << 8) - 1);
2389 break;
2390 }
2391 // t2ADDri will not require an extra register, it can reuse the
2392 // destination.
2393 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2394 break;
2395
2396 const MCInstrDesc &MCID = MI.getDesc();
2397 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i);
2398 if (RegClass && !RegClass->contains(ARM::SP))
2399 HasNonSPFrameIndex = true;
2400
2401 // Otherwise check the addressing mode.
2402 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2404 case ARMII::AddrMode2:
2405 // Default 12 bit limit.
2406 break;
2407 case ARMII::AddrMode3:
2409 Limit = std::min(Limit, (1U << 8) - 1);
2410 break;
2412 Limit = std::min(Limit, ((1U << 8) - 1) * 2);
2413 break;
2414 case ARMII::AddrMode5:
2417 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
2418 break;
2420 // i12 supports only positive offset so these will be converted to
2421 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2422 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2423 Limit = std::min(Limit, (1U << 8) - 1);
2424 break;
2425 case ARMII::AddrMode4:
2426 case ARMII::AddrMode6:
2427 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2428 // immediate offset for stack references.
2429 return 0;
2431 Limit = std::min(Limit, ((1U << 7) - 1) * 1);
2432 break;
2434 Limit = std::min(Limit, ((1U << 7) - 1) * 2);
2435 break;
2437 Limit = std::min(Limit, ((1U << 7) - 1) * 4);
2438 break;
2439 default:
2440 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2441 }
2442 break; // At most one FI per instruction
2443 }
2444 }
2445 }
2446
2447 return Limit;
2448}
2449
2450// In functions that realign the stack, it can be an advantage to spill the
2451// callee-saved vector registers after realigning the stack. The vst1 and vld1
2452// instructions take alignment hints that can improve performance.
2453static void
2455 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2457 return;
2458
2459 // Naked functions don't spill callee-saved registers.
2460 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2461 return;
2462
2463 // We are planning to use NEON instructions vst1 / vld1.
2464 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2465 return;
2466
2467 // Don't bother if the default stack alignment is sufficiently high.
2469 return;
2470
2471 // Aligned spills require stack realignment.
2472 if (!static_cast<const ARMBaseRegisterInfo *>(
2474 return;
2475
2476 // We always spill contiguous d-registers starting from d8. Count how many
2477 // needs spilling. The register allocator will almost always use the
2478 // callee-saved registers in order, but it can happen that there are holes in
2479 // the range. Registers above the hole will be spilled to the standard DPRCS
2480 // area.
2481 unsigned NumSpills = 0;
2482 for (; NumSpills < 8; ++NumSpills)
2483 if (!SavedRegs.test(ARM::D8 + NumSpills))
2484 break;
2485
2486 // Don't do this for just one d-register. It's not worth it.
2487 if (NumSpills < 2)
2488 return;
2489
2490 // Spill the first NumSpills D-registers after realigning the stack.
2491 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2492
2493 // A scratch register is required for the vst1 / vld1 instructions.
2494 SavedRegs.set(ARM::R4);
2495}
2496
2498 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2499 // upon function entry (resp. restore it immediately before return)
2500 if (STI.hasV8_1MMainlineOps() &&
2502 return false;
2503
2504 // We are disabling shrinkwrapping for now when PAC is enabled, as
2505 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2506 // generated. A follow-up patch will fix this in a more performant manner.
2508 true /* SpillsLR */))
2509 return false;
2510
2511 return true;
2512}
2513
2515 const MachineFunction &MF) const {
2516 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2517 return Subtarget.createAAPCSFrameChain() && hasFP(MF);
2518}
2519
2520// Thumb1 may require a spill when storing to a frame index through FP (or any
2521// access with execute-only), for cases where FP is a high register (R11). This
2522// scans the function for cases where this may happen.
2524 const TargetFrameLowering &TFI) {
2525 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2526 if (!AFI->isThumb1OnlyFunction())
2527 return false;
2528
2529 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2530 for (const auto &MBB : MF)
2531 for (const auto &MI : MBB)
2532 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2533 STI.genExecuteOnly())
2534 for (const auto &Op : MI.operands())
2535 if (Op.isFI()) {
2536 Register Reg;
2537 TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
2538 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2539 return true;
2540 }
2541 return false;
2542}
2543
2545 BitVector &SavedRegs,
2546 RegScavenger *RS) const {
2548 // This tells PEI to spill the FP as if it is any other callee-save register
2549 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2550 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2551 // to combine multiple loads / stores.
2552 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)) &&
2554 bool CS1Spilled = false;
2555 bool LRSpilled = false;
2556 unsigned NumGPRSpills = 0;
2557 unsigned NumFPRSpills = 0;
2558 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2559 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2560 const Function &F = MF.getFunction();
2561 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2562 MF.getSubtarget().getRegisterInfo());
2563 const ARMBaseInstrInfo &TII =
2564 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2566 MachineFrameInfo &MFI = MF.getFrameInfo();
2567 MachineRegisterInfo &MRI = MF.getRegInfo();
2569 (void)TRI; // Silence unused warning in non-assert builds.
2570 Register FramePtr = STI.getFramePointerReg();
2572 STI.getPushPopSplitVariation(MF);
2573
2574 // For a floating point interrupt, save these registers always, since LLVM
2575 // currently doesn't model reads/writes to these registers.
2576 if (F.hasFnAttribute("interrupt") && F.hasFnAttribute("save-fp")) {
2577 SavedRegs.set(ARM::FPSCR);
2578 SavedRegs.set(ARM::R4);
2579
2580 // This register will only be present on non-MClass registers.
2581 if (STI.isMClass()) {
2582 SavedRegs.reset(ARM::FPEXC);
2583 } else {
2584 SavedRegs.set(ARM::FPEXC);
2585 SavedRegs.set(ARM::R5);
2586 }
2587 }
2588
2589 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2590 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2591 // since it's not always possible to restore sp from fp in a single
2592 // instruction.
2593 // FIXME: It will be better just to find spare register here.
2594 if (AFI->isThumb2Function() &&
2595 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2596 SavedRegs.set(ARM::R4);
2597
2598 // If a stack probe will be emitted, spill R4 and LR, since they are
2599 // clobbered by the stack probe call.
2600 // This estimate should be a safe, conservative estimate. The actual
2601 // stack probe is enabled based on the size of the local objects;
2602 // this estimate also includes the varargs store size.
2603 if (STI.isTargetWindows() &&
2604 WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2605 SavedRegs.set(ARM::R4);
2606 SavedRegs.set(ARM::LR);
2607 }
2608
2609 if (AFI->isThumb1OnlyFunction()) {
2610 // Spill LR if Thumb1 function uses variable length argument lists.
2611 if (AFI->getArgRegsSaveSize() > 0)
2612 SavedRegs.set(ARM::LR);
2613
2614 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2615 // requires stack alignment. We don't know for sure what the stack size
2616 // will be, but for this, an estimate is good enough. If there anything
2617 // changes it, it'll be a spill, which implies we've used all the registers
2618 // and so R4 is already used, so not marking it here will be OK.
2619 // FIXME: It will be better just to find spare register here.
2620 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2621 MFI.estimateStackSize(MF) > 508)
2622 SavedRegs.set(ARM::R4);
2623 }
2624
2625 // See if we can spill vector registers to aligned stack.
2626 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2627
2628 // Spill the BasePtr if it's used.
2629 if (RegInfo->hasBasePointer(MF))
2630 SavedRegs.set(RegInfo->getBaseRegister());
2631
2632 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2633 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2634 CanEliminateFrame = false;
2635
2636 // When return address signing is enabled R12 is treated as callee-saved.
2637 if (AFI->shouldSignReturnAddress())
2638 CanEliminateFrame = false;
2639
2640 // Don't spill FP if the frame can be eliminated. This is determined
2641 // by scanning the callee-save registers to see if any is modified.
2642 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2643 for (unsigned i = 0; CSRegs[i]; ++i) {
2644 unsigned Reg = CSRegs[i];
2645 bool Spilled = false;
2646 if (SavedRegs.test(Reg)) {
2647 Spilled = true;
2648 CanEliminateFrame = false;
2649 }
2650
2651 if (!ARM::GPRRegClass.contains(Reg)) {
2652 if (Spilled) {
2653 if (ARM::SPRRegClass.contains(Reg))
2654 NumFPRSpills++;
2655 else if (ARM::DPRRegClass.contains(Reg))
2656 NumFPRSpills += 2;
2657 else if (ARM::QPRRegClass.contains(Reg))
2658 NumFPRSpills += 4;
2659 }
2660 continue;
2661 }
2662
2663 if (Spilled) {
2664 NumGPRSpills++;
2665
2666 if (PushPopSplit != ARMSubtarget::SplitR7) {
2667 if (Reg == ARM::LR)
2668 LRSpilled = true;
2669 CS1Spilled = true;
2670 continue;
2671 }
2672
2673 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2674 switch (Reg) {
2675 case ARM::LR:
2676 LRSpilled = true;
2677 [[fallthrough]];
2678 case ARM::R0: case ARM::R1:
2679 case ARM::R2: case ARM::R3:
2680 case ARM::R4: case ARM::R5:
2681 case ARM::R6: case ARM::R7:
2682 CS1Spilled = true;
2683 break;
2684 default:
2685 break;
2686 }
2687 } else {
2688 if (PushPopSplit != ARMSubtarget::SplitR7) {
2689 UnspilledCS1GPRs.push_back(Reg);
2690 continue;
2691 }
2692
2693 switch (Reg) {
2694 case ARM::R0: case ARM::R1:
2695 case ARM::R2: case ARM::R3:
2696 case ARM::R4: case ARM::R5:
2697 case ARM::R6: case ARM::R7:
2698 case ARM::LR:
2699 UnspilledCS1GPRs.push_back(Reg);
2700 break;
2701 default:
2702 UnspilledCS2GPRs.push_back(Reg);
2703 break;
2704 }
2705 }
2706 }
2707
2708 bool ForceLRSpill = false;
2709 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2710 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2711 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2712 // use of BL to implement far jump.
2713 if (FnSize >= (1 << 11)) {
2714 CanEliminateFrame = false;
2715 ForceLRSpill = true;
2716 }
2717 }
2718
2719 // If any of the stack slot references may be out of range of an immediate
2720 // offset, make sure a register (or a spill slot) is available for the
2721 // register scavenger. Note that if we're indexing off the frame pointer, the
2722 // effective stack size is 4 bytes larger since the FP points to the stack
2723 // slot of the previous FP. Also, if we have variable sized objects in the
2724 // function, stack slot references will often be negative, and some of
2725 // our instructions are positive-offset only, so conservatively consider
2726 // that case to want a spill slot (or register) as well. Similarly, if
2727 // the function adjusts the stack pointer during execution and the
2728 // adjustments aren't already part of our stack size estimate, our offset
2729 // calculations may be off, so be conservative.
2730 // FIXME: We could add logic to be more precise about negative offsets
2731 // and which instructions will need a scratch register for them. Is it
2732 // worth the effort and added fragility?
2733 unsigned EstimatedStackSize =
2734 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2735
2736 // Determine biggest (positive) SP offset in MachineFrameInfo.
2737 int MaxFixedOffset = 0;
2738 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2739 int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2740 MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2741 }
2742
2743 bool HasFP = hasFP(MF);
2744 if (HasFP) {
2745 if (AFI->hasStackFrame())
2746 EstimatedStackSize += 4;
2747 } else {
2748 // If FP is not used, SP will be used to access arguments, so count the
2749 // size of arguments into the estimation.
2750 EstimatedStackSize += MaxFixedOffset;
2751 }
2752 EstimatedStackSize += 16; // For possible paddings.
2753
2754 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2755 bool HasNonSPFrameIndex = false;
2756 if (AFI->isThumb1OnlyFunction()) {
2757 // For Thumb1, don't bother to iterate over the function. The only
2758 // instruction that requires an emergency spill slot is a store to a
2759 // frame index.
2760 //
2761 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2762 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2763 // a 5-bit unsigned immediate.
2764 //
2765 // We could try to check if the function actually contains a tSTRspi
2766 // that might need the spill slot, but it's not really important.
2767 // Functions with VLAs or extremely large call frames are rare, and
2768 // if a function is allocating more than 1KB of stack, an extra 4-byte
2769 // slot probably isn't relevant.
2770 //
2771 // A special case is the scenario where r11 is used as FP, where accesses
2772 // to a frame index will require its value to be moved into a low reg.
2773 // This is handled later on, once we are able to determine if we have any
2774 // fp-relative accesses.
2775 if (RegInfo->hasBasePointer(MF))
2776 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2777 else
2778 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2779 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2780 } else {
2781 EstimatedRSStackSizeLimit =
2782 estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2783 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2784 }
2785 // Final estimate of whether sp or bp-relative accesses might require
2786 // scavenging.
2787 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2788
2789 // If the stack pointer moves and we don't have a base pointer, the
2790 // estimate logic doesn't work. The actual offsets might be larger when
2791 // we're constructing a call frame, or we might need to use negative
2792 // offsets from fp.
2793 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2794 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2795 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2796
2797 // If we have a frame pointer, we assume arguments will be accessed
2798 // relative to the frame pointer. Check whether fp-relative accesses to
2799 // arguments require scavenging.
2800 //
2801 // We could do slightly better on Thumb1; in some cases, an sp-relative
2802 // offset would be legal even though an fp-relative offset is not.
2803 int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
2804 bool HasLargeArgumentList =
2805 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2806
2807 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2808 HasLargeArgumentList || HasNonSPFrameIndex;
2809 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2810 << "; EstimatedStack: " << EstimatedStackSize
2811 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2812 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2813 if (BigFrameOffsets ||
2814 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2815 AFI->setHasStackFrame(true);
2816
2817 // Save the FP if:
2818 // 1. We currently need it (HasFP), OR
2819 // 2. We might need it later due to stack realignment from aligned DPRCS2
2820 // saves (which will make hasFP() become true in emitPrologue).
2821 if (HasFP || (isFPReserved(MF) && AFI->getNumAlignedDPRCS2Regs() > 0)) {
2822 SavedRegs.set(FramePtr);
2823 // If the frame pointer is required by the ABI, also spill LR so that we
2824 // emit a complete frame record.
2825 if ((requiresAAPCSFrameRecord(MF) ||
2827 !LRSpilled) {
2828 SavedRegs.set(ARM::LR);
2829 LRSpilled = true;
2830 NumGPRSpills++;
2831 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2832 if (LRPos != UnspilledCS1GPRs.end())
2833 UnspilledCS1GPRs.erase(LRPos);
2834 }
2835 auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2836 if (FPPos != UnspilledCS1GPRs.end())
2837 UnspilledCS1GPRs.erase(FPPos);
2838 NumGPRSpills++;
2839 if (FramePtr == ARM::R7)
2840 CS1Spilled = true;
2841 }
2842
2843 // This is the number of extra spills inserted for callee-save GPRs which
2844 // would not otherwise be used by the function. When greater than zero it
2845 // guaranteees that it is possible to scavenge a register to hold the
2846 // address of a stack slot. On Thumb1, the register must be a valid operand
2847 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2848 // or lr.
2849 //
2850 // If we don't insert a spill, we instead allocate an emergency spill
2851 // slot, which can be used by scavenging to spill an arbitrary register.
2852 //
2853 // We currently don't try to figure out whether any specific instruction
2854 // requires scavening an additional register.
2855 unsigned NumExtraCSSpill = 0;
2856
2857 if (AFI->isThumb1OnlyFunction()) {
2858 // For Thumb1-only targets, we need some low registers when we save and
2859 // restore the high registers (which aren't allocatable, but could be
2860 // used by inline assembly) because the push/pop instructions can not
2861 // access high registers. If necessary, we might need to push more low
2862 // registers to ensure that there is at least one free that can be used
2863 // for the saving & restoring, and preferably we should ensure that as
2864 // many as are needed are available so that fewer push/pop instructions
2865 // are required.
2866
2867 // Low registers which are not currently pushed, but could be (r4-r7).
2868 SmallVector<unsigned, 4> AvailableRegs;
2869
2870 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2871 // free.
2872 int EntryRegDeficit = 0;
2873 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2874 if (!MF.getRegInfo().isLiveIn(Reg)) {
2875 --EntryRegDeficit;
2877 << printReg(Reg, TRI)
2878 << " is unused argument register, EntryRegDeficit = "
2879 << EntryRegDeficit << "\n");
2880 }
2881 }
2882
2883 // Unused return registers can be clobbered in the epilogue for free.
2884 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2886 << " return regs used, ExitRegDeficit = "
2887 << ExitRegDeficit << "\n");
2888
2889 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2890 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2891
2892 // r4-r6 can be used in the prologue if they are pushed by the first push
2893 // instruction.
2894 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2895 if (SavedRegs.test(Reg)) {
2896 --RegDeficit;
2897 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2898 << " is saved low register, RegDeficit = "
2899 << RegDeficit << "\n");
2900 } else {
2901 AvailableRegs.push_back(Reg);
2902 LLVM_DEBUG(
2903 dbgs()
2904 << printReg(Reg, TRI)
2905 << " is non-saved low register, adding to AvailableRegs\n");
2906 }
2907 }
2908
2909 // r7 can be used if it is not being used as the frame pointer.
2910 if (!HasFP || FramePtr != ARM::R7) {
2911 if (SavedRegs.test(ARM::R7)) {
2912 --RegDeficit;
2913 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2914 << RegDeficit << "\n");
2915 } else {
2916 AvailableRegs.push_back(ARM::R7);
2917 LLVM_DEBUG(
2918 dbgs()
2919 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2920 }
2921 }
2922
2923 // Each of r8-r11 needs to be copied to a low register, then pushed.
2924 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2925 if (SavedRegs.test(Reg)) {
2926 ++RegDeficit;
2927 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2928 << " is saved high register, RegDeficit = "
2929 << RegDeficit << "\n");
2930 }
2931 }
2932
2933 // LR can only be used by PUSH, not POP, and can't be used at all if the
2934 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2935 // are more limited at function entry than exit.
2936 if ((EntryRegDeficit > ExitRegDeficit) &&
2937 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2939 if (SavedRegs.test(ARM::LR)) {
2940 --RegDeficit;
2941 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2942 << RegDeficit << "\n");
2943 } else {
2944 AvailableRegs.push_back(ARM::LR);
2945 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2946 }
2947 }
2948
2949 // If there are more high registers that need pushing than low registers
2950 // available, push some more low registers so that we can use fewer push
2951 // instructions. This might not reduce RegDeficit all the way to zero,
2952 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2953 // need saving.
2954 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2955 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2956 unsigned Reg = AvailableRegs.pop_back_val();
2957 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2958 << " to make up reg deficit\n");
2959 SavedRegs.set(Reg);
2960 NumGPRSpills++;
2961 CS1Spilled = true;
2962 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2963 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2964 NumExtraCSSpill++;
2965 UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2966 if (Reg == ARM::LR)
2967 LRSpilled = true;
2968 }
2969 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2970 << "\n");
2971 }
2972
2973 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2974 // restore LR in that case.
2975 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2976
2977 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2978 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2979 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2980 SavedRegs.set(ARM::LR);
2981 NumGPRSpills++;
2983 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2984 if (LRPos != UnspilledCS1GPRs.end())
2985 UnspilledCS1GPRs.erase(LRPos);
2986
2987 ForceLRSpill = false;
2988 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2989 !AFI->isThumb1OnlyFunction())
2990 NumExtraCSSpill++;
2991 }
2992
2993 // If stack and double are 8-byte aligned and we are spilling an odd number
2994 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2995 // the integer and double callee save areas.
2996 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2997 const Align TargetAlign = getStackAlign();
2998 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2999 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
3000 for (unsigned Reg : UnspilledCS1GPRs) {
3001 // Don't spill high register if the function is thumb. In the case of
3002 // Windows on ARM, accept R11 (frame pointer)
3003 if (!AFI->isThumbFunction() ||
3004 (STI.isTargetWindows() && Reg == ARM::R11) ||
3005 isARMLowRegister(Reg) ||
3006 (Reg == ARM::LR && !ExpensiveLRRestore)) {
3007 SavedRegs.set(Reg);
3008 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
3009 << " to make up alignment\n");
3010 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
3011 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
3012 NumExtraCSSpill++;
3013 break;
3014 }
3015 }
3016 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
3017 unsigned Reg = UnspilledCS2GPRs.front();
3018 SavedRegs.set(Reg);
3019 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
3020 << " to make up alignment\n");
3021 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
3022 NumExtraCSSpill++;
3023 }
3024 }
3025
3026 // Estimate if we might need to scavenge registers at some point in order
3027 // to materialize a stack offset. If so, either spill one additional
3028 // callee-saved register or reserve a special spill slot to facilitate
3029 // register scavenging. Thumb1 needs a spill slot for stack pointer
3030 // adjustments and for frame index accesses when FP is high register,
3031 // even when the frame itself is small.
3032 unsigned RegsNeeded = 0;
3033 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this)) {
3034 RegsNeeded++;
3035 // With thumb1 execute-only we may need an additional register for saving
3036 // and restoring the CPSR.
3037 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
3038 RegsNeeded++;
3039 }
3040
3041 if (RegsNeeded > NumExtraCSSpill) {
3042 // If any non-reserved CS register isn't spilled, just spill one or two
3043 // extra. That should take care of it!
3044 unsigned NumExtras = TargetAlign.value() / 4;
3046 while (NumExtras && !UnspilledCS1GPRs.empty()) {
3047 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
3048 if (!MRI.isReserved(Reg) &&
3049 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
3050 Extras.push_back(Reg);
3051 NumExtras--;
3052 }
3053 }
3054 // For non-Thumb1 functions, also check for hi-reg CS registers
3055 if (!AFI->isThumb1OnlyFunction()) {
3056 while (NumExtras && !UnspilledCS2GPRs.empty()) {
3057 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
3058 if (!MRI.isReserved(Reg)) {
3059 Extras.push_back(Reg);
3060 NumExtras--;
3061 }
3062 }
3063 }
3064 if (NumExtras == 0) {
3065 for (unsigned Reg : Extras) {
3066 SavedRegs.set(Reg);
3067 if (!MRI.isPhysRegUsed(Reg))
3068 NumExtraCSSpill++;
3069 }
3070 }
3071 while ((RegsNeeded > NumExtraCSSpill) && RS) {
3072 // Reserve a slot closest to SP or frame pointer.
3073 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
3074 const TargetRegisterClass &RC = ARM::GPRRegClass;
3075 unsigned Size = TRI->getSpillSize(RC);
3076 Align Alignment = TRI->getSpillAlign(RC);
3077 RS->addScavengingFrameIndex(
3078 MFI.CreateSpillStackObject(Size, Alignment));
3079 --RegsNeeded;
3080 }
3081 }
3082 }
3083
3084 if (ForceLRSpill)
3085 SavedRegs.set(ARM::LR);
3086 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
3087}
3088
3090 MachineFrameInfo &MFI = MF.getFrameInfo();
3091 if (!MFI.isCalleeSavedInfoValid())
3092 return;
3093
3094 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
3095 // into PC so it is not live out of the return block: Clear the Restored bit
3096 // in that case.
3097 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
3098 if (Info.getReg() != ARM::LR)
3099 continue;
3100 if (all_of(MF, [](const MachineBasicBlock &MBB) {
3101 return all_of(MBB.terminators(), [](const MachineInstr &Term) {
3102 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
3103 Term.getOpcode() == ARM::t2LDMIA_RET ||
3104 Term.getOpcode() == ARM::tPOP_RET;
3105 });
3106 })) {
3107 Info.setRestored(false);
3108 break;
3109 }
3110 }
3111}
3112
3118
3120 BitVector &SavedRegs) const {
3122
3123 // If we have the "returned" parameter attribute which guarantees that we
3124 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
3125 // record that fact for IPRA.
3126 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3127 if (AFI->getPreservesR0())
3128 SavedRegs.set(ARM::R0);
3129}
3130
3133 std::vector<CalleeSavedInfo> &CSI) const {
3134 // For CMSE entry functions, handle floating-point context as if it was a
3135 // callee-saved register.
3136 if (STI.hasV8_1MMainlineOps() &&
3138 CSI.emplace_back(ARM::FPCXTNS);
3139 CSI.back().setRestored(false);
3140 }
3141
3142 // For functions, which sign their return address, upon function entry, the
3143 // return address PAC is computed in R12. Treat R12 as a callee-saved register
3144 // in this case.
3145 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
3146 if (AFI.shouldSignReturnAddress()) {
3147 // The order of register must match the order we push them, because the
3148 // PEI assigns frame indices in that order. That order depends on the
3149 // PushPopSplitVariation, there are only two cases which we use with return
3150 // address signing:
3151 switch (STI.getPushPopSplitVariation(MF)) {
3153 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
3154 CSI.insert(find_if(CSI,
3155 [=](const auto &CS) {
3156 MCRegister Reg = CS.getReg();
3157 return Reg == ARM::R10 || Reg == ARM::R11 ||
3158 Reg == ARM::R8 || Reg == ARM::R9 ||
3159 ARM::DPRRegClass.contains(Reg);
3160 }),
3161 CalleeSavedInfo(ARM::R12));
3162 break;
3164 // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
3165 // on the stack.
3166 CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
3167 break;
3170 "ABI-required frame pointers need a CSR split when signing return "
3171 "address.");
3172 CSI.insert(find_if(CSI,
3173 [=](const auto &CS) {
3174 MCRegister Reg = CS.getReg();
3175 return Reg != ARM::LR;
3176 }),
3177 CalleeSavedInfo(ARM::R12));
3178 break;
3179 default:
3180 llvm_unreachable("Unexpected CSR split with return address signing");
3181 }
3182 }
3183
3184 return false;
3185}
3186
3189 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
3190 NumEntries = std::size(FixedSpillOffsets);
3191 return FixedSpillOffsets;
3192}
3193
3194MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
3197 const ARMBaseInstrInfo &TII =
3198 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3200 bool isARM = !AFI->isThumbFunction();
3201 DebugLoc dl = I->getDebugLoc();
3202 unsigned Opc = I->getOpcode();
3203 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
3204 unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
3205
3206 assert(!AFI->isThumb1OnlyFunction() &&
3207 "This eliminateCallFramePseudoInstr does not support Thumb1!");
3208
3209 int PIdx = I->findFirstPredOperandIdx();
3210 ARMCC::CondCodes Pred = (PIdx == -1)
3211 ? ARMCC::AL
3212 : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
3213 unsigned PredReg = TII.getFramePred(*I);
3214
3215 if (!hasReservedCallFrame(MF)) {
3216 // Bail early if the callee is expected to do the adjustment.
3217 if (IsDestroy && CalleePopAmount != -1U)
3218 return MBB.erase(I);
3219
3220 // If we have alloca, convert as follows:
3221 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
3222 // ADJCALLSTACKUP -> add, sp, sp, amount
3223 unsigned Amount = TII.getFrameSize(*I);
3224 if (Amount != 0) {
3225 // We need to keep the stack aligned properly. To do this, we round the
3226 // amount of space needed for the outgoing arguments up to the next
3227 // alignment boundary.
3228 Amount = alignSPAdjust(Amount);
3229
3230 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
3231 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
3232 Pred, PredReg);
3233 } else {
3234 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
3235 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
3236 Pred, PredReg);
3237 }
3238 }
3239 } else if (CalleePopAmount != -1U) {
3240 // If the calling convention demands that the callee pops arguments from the
3241 // stack, we want to add it back if we have a reserved call frame.
3242 emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
3243 MachineInstr::NoFlags, Pred, PredReg);
3244 }
3245 return MBB.erase(I);
3246}
3247
3248/// Get the minimum constant for ARM that is greater than or equal to the
3249/// argument. In ARM, constants can have any value that can be produced by
3250/// rotating an 8-bit value to the right by an even number of bits within a
3251/// 32-bit word.
3253 unsigned Shifted = 0;
3254
3255 if (Value == 0)
3256 return 0;
3257
3258 while (!(Value & 0xC0000000)) {
3259 Value = Value << 2;
3260 Shifted += 2;
3261 }
3262
3263 bool Carry = (Value & 0x00FFFFFF);
3264 Value = ((Value & 0xFF000000) >> 24) + Carry;
3265
3266 if (Value & 0x0000100)
3267 Value = Value & 0x000001FC;
3268
3269 if (Shifted > 24)
3270 Value = Value >> (Shifted - 24);
3271 else
3272 Value = Value << (24 - Shifted);
3273
3274 return Value;
3275}
3276
3277// The stack limit in the TCB is set to this many bytes above the actual
3278// stack limit.
3280
3281// Adjust the function prologue to enable split stacks. This currently only
3282// supports android and linux.
3283//
3284// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
3285// must be well defined in order to allow for consistent implementations of the
3286// __morestack helper function. The ABI is also not a normal ABI in that it
3287// doesn't follow the normal calling conventions because this allows the
3288// prologue of each function to be optimized further.
3289//
3290// Currently, the ABI looks like (when calling __morestack)
3291//
3292// * r4 holds the minimum stack size requested for this function call
3293// * r5 holds the stack size of the arguments to the function
3294// * the beginning of the function is 3 instructions after the call to
3295// __morestack
3296//
3297// Implementations of __morestack should use r4 to allocate a new stack, r5 to
3298// place the arguments on to the new stack, and the 3-instruction knowledge to
3299// jump directly to the body of the function when working on the new stack.
3300//
3301// An old (and possibly no longer compatible) implementation of __morestack for
3302// ARM can be found at [1].
3303//
3304// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
3306 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3307 unsigned Opcode;
3308 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
3309 bool Thumb = ST->isThumb();
3310 bool Thumb2 = ST->isThumb2();
3311
3312 // Sadly, this currently doesn't support varargs, platforms other than
3313 // android/linux. Note that thumb1/thumb2 are support for android/linux.
3314 if (MF.getFunction().isVarArg())
3315 report_fatal_error("Segmented stacks do not support vararg functions.");
3316 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
3317 report_fatal_error("Segmented stacks not supported on this platform.");
3318
3319 MachineFrameInfo &MFI = MF.getFrameInfo();
3320 const ARMBaseInstrInfo &TII =
3321 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3323 DebugLoc DL;
3324
3325 if (!MFI.needsSplitStackProlog())
3326 return;
3327
3328 uint64_t StackSize = MFI.getStackSize();
3329
3330 // Use R4 and R5 as scratch registers.
3331 // We save R4 and R5 before use and restore them before leaving the function.
3332 unsigned ScratchReg0 = ARM::R4;
3333 unsigned ScratchReg1 = ARM::R5;
3334 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3335 uint64_t AlignedStackSize;
3336
3337 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3338 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3342
3343 // Grab everything that reaches PrologueMBB to update there liveness as well.
3344 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3346 WalkList.push_back(&PrologueMBB);
3347
3348 do {
3349 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3350 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3351 if (BeforePrologueRegion.insert(PredBB).second)
3352 WalkList.push_back(PredBB);
3353 }
3354 } while (!WalkList.empty());
3355
3356 // The order in that list is important.
3357 // The blocks will all be inserted before PrologueMBB using that order.
3358 // Therefore the block that should appear first in the CFG should appear
3359 // first in the list.
3360 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3361 PostStackMBB};
3362
3363 BeforePrologueRegion.insert_range(AddedBlocks);
3364
3365 for (const auto &LI : PrologueMBB.liveins()) {
3366 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3367 PredBB->addLiveIn(LI);
3368 }
3369
3370 // Remove the newly added blocks from the list, since we know
3371 // we do not have to do the following updates for them.
3372 for (MachineBasicBlock *B : AddedBlocks) {
3373 BeforePrologueRegion.erase(B);
3374 MF.insert(PrologueMBB.getIterator(), B);
3375 }
3376
3377 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3378 // Make sure the LiveIns are still sorted and unique.
3379 MBB->sortUniqueLiveIns();
3380 // Replace the edges to PrologueMBB by edges to the sequences
3381 // we are about to add, but only update for immediate predecessors.
3382 if (MBB->isSuccessor(&PrologueMBB))
3383 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
3384 }
3385
3386 // The required stack size that is aligned to ARM constant criterion.
3387 AlignedStackSize = alignToARMConstant(StackSize);
3388
3389 // When the frame size is less than 256 we just compare the stack
3390 // boundary directly to the value of the stack pointer, per gcc.
3391 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3392
3393 // We will use two of the callee save registers as scratch registers so we
3394 // need to save those registers onto the stack.
3395 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3396 // requested and arguments for __morestack().
3397 // SR0: Scratch Register #0
3398 // SR1: Scratch Register #1
3399 // push {SR0, SR1}
3400 if (Thumb) {
3401 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3403 .addReg(ScratchReg0)
3404 .addReg(ScratchReg1);
3405 } else {
3406 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3407 .addReg(ARM::SP, RegState::Define)
3408 .addReg(ARM::SP)
3410 .addReg(ScratchReg0)
3411 .addReg(ScratchReg1);
3412 }
3413
3414 // Emit the relevant DWARF information about the change in stack pointer as
3415 // well as where to find both r4 and r5 (the callee-save registers)
3416 if (!MF.getTarget().getMCAsmInfo().usesWindowsCFI()) {
3417 CFIInstBuilder CFIBuilder(PrevStackMBB, MachineInstr::NoFlags);
3418 CFIBuilder.buildDefCFAOffset(8);
3419 CFIBuilder.buildOffset(ScratchReg1, -4);
3420 CFIBuilder.buildOffset(ScratchReg0, -8);
3421 }
3422
3423 // mov SR1, sp
3424 if (Thumb) {
3425 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3426 .addReg(ARM::SP)
3428 } else if (CompareStackPointer) {
3429 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3430 .addReg(ARM::SP)
3432 .add(condCodeOp());
3433 }
3434
3435 // sub SR1, sp, #StackSize
3436 if (!CompareStackPointer && Thumb) {
3437 if (AlignedStackSize < 256) {
3438 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3439 .add(condCodeOp())
3440 .addReg(ScratchReg1)
3441 .addImm(AlignedStackSize)
3443 } else {
3444 if (Thumb2 || ST->genExecuteOnly()) {
3445 BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)
3446 .addImm(AlignedStackSize);
3447 } else {
3448 auto MBBI = McrMBB->end();
3449 auto RegInfo = STI.getRegisterInfo();
3450 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3451 AlignedStackSize);
3452 }
3453 BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3454 .add(condCodeOp())
3455 .addReg(ScratchReg1)
3456 .addReg(ScratchReg0)
3458 }
3459 } else if (!CompareStackPointer) {
3460 if (AlignedStackSize < 256) {
3461 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3462 .addReg(ARM::SP)
3463 .addImm(AlignedStackSize)
3465 .add(condCodeOp());
3466 } else {
3467 auto MBBI = McrMBB->end();
3468 auto RegInfo = STI.getRegisterInfo();
3469 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3470 AlignedStackSize);
3471 BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3472 .addReg(ARM::SP)
3473 .addReg(ScratchReg0)
3475 .add(condCodeOp());
3476 }
3477 }
3478
3479 if (Thumb && ST->isThumb1Only()) {
3480 if (ST->genExecuteOnly()) {
3481 BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)
3482 .addExternalSymbol("__STACK_LIMIT");
3483 } else {
3484 unsigned PCLabelId = ARMFI->createPICLabelUId();
3486 MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
3488 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
3489
3490 // ldr SR0, [pc, offset(STACK_LIMIT)]
3491 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3494 }
3495
3496 // ldr SR0, [SR0]
3497 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3498 .addReg(ScratchReg0)
3499 .addImm(0)
3501 } else {
3502 // Get TLS base address from the coprocessor
3503 // mrc p15, #0, SR0, c13, c0, #3
3504 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3505 ScratchReg0)
3506 .addImm(15)
3507 .addImm(0)
3508 .addImm(13)
3509 .addImm(0)
3510 .addImm(3)
3512
3513 // Use the last tls slot on android and a private field of the TCP on linux.
3514 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3515 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3516
3517 // Get the stack limit from the right offset
3518 // ldr SR0, [sr0, #4 * TlsOffset]
3519 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3520 ScratchReg0)
3521 .addReg(ScratchReg0)
3522 .addImm(4 * TlsOffset)
3524 }
3525
3526 // Compare stack limit with stack size requested.
3527 // cmp SR0, SR1
3528 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3529 BuildMI(GetMBB, DL, TII.get(Opcode))
3530 .addReg(ScratchReg0)
3531 .addReg(ScratchReg1)
3533
3534 // This jump is taken if StackLimit <= SP - stack required.
3535 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3536 BuildMI(GetMBB, DL, TII.get(Opcode))
3537 .addMBB(PostStackMBB)
3539 .addReg(ARM::CPSR);
3540
3541 // Calling __morestack(StackSize, Size of stack arguments).
3542 // __morestack knows that the stack size requested is in SR0(r4)
3543 // and amount size of stack arguments is in SR1(r5).
3544
3545 // Pass first argument for the __morestack by Scratch Register #0.
3546 // The amount size of stack required
3547 if (Thumb) {
3548 if (AlignedStackSize < 256) {
3549 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3550 .add(condCodeOp())
3551 .addImm(AlignedStackSize)
3553 } else {
3554 if (Thumb2 || ST->genExecuteOnly()) {
3555 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)
3556 .addImm(AlignedStackSize);
3557 } else {
3558 auto MBBI = AllocMBB->end();
3559 auto RegInfo = STI.getRegisterInfo();
3560 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3561 AlignedStackSize);
3562 }
3563 }
3564 } else {
3565 if (AlignedStackSize < 256) {
3566 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3567 .addImm(AlignedStackSize)
3569 .add(condCodeOp());
3570 } else {
3571 auto MBBI = AllocMBB->end();
3572 auto RegInfo = STI.getRegisterInfo();
3573 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3574 AlignedStackSize);
3575 }
3576 }
3577
3578 // Pass second argument for the __morestack by Scratch Register #1.
3579 // The amount size of stack consumed to save function arguments.
3580 if (Thumb) {
3581 if (ARMFI->getArgumentStackSize() < 256) {
3582 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3583 .add(condCodeOp())
3586 } else {
3587 if (Thumb2 || ST->genExecuteOnly()) {
3588 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)
3590 } else {
3591 auto MBBI = AllocMBB->end();
3592 auto RegInfo = STI.getRegisterInfo();
3593 RegInfo->emitLoadConstPool(
3594 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3596 }
3597 }
3598 } else {
3599 if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3600 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3603 .add(condCodeOp());
3604 } else {
3605 auto MBBI = AllocMBB->end();
3606 auto RegInfo = STI.getRegisterInfo();
3607 RegInfo->emitLoadConstPool(
3608 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3610 }
3611 }
3612
3613 // push {lr} - Save return address of this function.
3614 if (Thumb) {
3615 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3617 .addReg(ARM::LR);
3618 } else {
3619 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3620 .addReg(ARM::SP, RegState::Define)
3621 .addReg(ARM::SP)
3623 .addReg(ARM::LR);
3624 }
3625
3626 // Emit the DWARF info about the change in stack as well as where to find the
3627 // previous link register
3628 if (!MF.getTarget().getMCAsmInfo().usesWindowsCFI()) {
3629 CFIInstBuilder CFIBuilder(AllocMBB, MachineInstr::NoFlags);
3630 CFIBuilder.buildDefCFAOffset(12);
3631 CFIBuilder.buildOffset(ARM::LR, -12);
3632 }
3633
3634 // Call __morestack().
3635 if (Thumb) {
3636 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3638 .addExternalSymbol("__morestack");
3639 } else {
3640 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3641 .addExternalSymbol("__morestack");
3642 }
3643
3644 // pop {lr} - Restore return address of this original function.
3645 if (Thumb) {
3646 if (ST->isThumb1Only()) {
3647 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3649 .addReg(ScratchReg0);
3650 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3651 .addReg(ScratchReg0)
3653 } else {
3654 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3655 .addReg(ARM::LR, RegState::Define)
3656 .addReg(ARM::SP, RegState::Define)
3657 .addReg(ARM::SP)
3658 .addImm(4)
3660 }
3661 } else {
3662 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3663 .addReg(ARM::SP, RegState::Define)
3664 .addReg(ARM::SP)
3666 .addReg(ARM::LR);
3667 }
3668
3669 // Restore SR0 and SR1 in case of __morestack() was called.
3670 // __morestack() will skip PostStackMBB block so we need to restore
3671 // scratch registers from here.
3672 // pop {SR0, SR1}
3673 if (Thumb) {
3674 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3676 .addReg(ScratchReg0)
3677 .addReg(ScratchReg1);
3678 } else {
3679 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3680 .addReg(ARM::SP, RegState::Define)
3681 .addReg(ARM::SP)
3683 .addReg(ScratchReg0)
3684 .addReg(ScratchReg1);
3685 }
3686
3687 // Update the CFA offset now that we've popped
3690
3691 // Return from this function.
3692 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3693
3694 // Restore SR0 and SR1 in case of __morestack() was not called.
3695 // pop {SR0, SR1}
3696 if (Thumb) {
3697 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3699 .addReg(ScratchReg0)
3700 .addReg(ScratchReg1);
3701 } else {
3702 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3703 .addReg(ARM::SP, RegState::Define)
3704 .addReg(ARM::SP)
3706 .addReg(ScratchReg0)
3707 .addReg(ScratchReg1);
3708 }
3709
3710 // Update the CFA offset now that we've popped
3711 if (!MF.getTarget().getMCAsmInfo().usesWindowsCFI()) {
3712 CFIInstBuilder CFIBuilder(PostStackMBB, MachineInstr::NoFlags);
3713 CFIBuilder.buildDefCFAOffset(0);
3714
3715 // Tell debuggers that r4 and r5 are now the same as they were in the
3716 // previous function, that they're the "Same Value".
3717 CFIBuilder.buildSameValue(ScratchReg0);
3718 CFIBuilder.buildSameValue(ScratchReg1);
3719 }
3720
3721 // Organizing MBB lists
3722 PostStackMBB->addSuccessor(&PrologueMBB);
3723
3724 AllocMBB->addSuccessor(PostStackMBB);
3725
3726 GetMBB->addSuccessor(PostStackMBB);
3727 GetMBB->addSuccessor(AllocMBB);
3728
3729 McrMBB->addSuccessor(GetMBB);
3730
3731 PrevStackMBB->addSuccessor(McrMBB);
3732
3733#ifdef EXPENSIVE_CHECKS
3734 MF.verify();
3735#endif
3736}
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
static bool needsWinCFI(const MachineFunction *MF)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isThumb(const MCSubtargetInfo &STI)
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
static void insertSEHRange(MachineBasicBlock &MBB, MachineBasicBlock::iterator Start, const MachineBasicBlock::iterator &End, const ARMBaseInstrInfo &TII, unsigned MIFlags)
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static int getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, unsigned Flags)
SpillArea getSpillArea(Register Reg, ARMSubtarget::PushPopSplitVariation Variation, unsigned NumAlignedDPRCS2Regs, const ARMBaseRegisterInfo *RegInfo)
Get the spill area that Reg should be saved into in the prologue.
static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, const TargetFrameLowering &TFI)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, const MachineFunction &MF)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
static MachineBasicBlock::iterator initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI)
static int sizeOfSPAdjustment(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
Value * RHS
Value * LHS
static const unsigned FramePtr
bool canRealignStack(const MachineFunction &MF) const override
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF, int FI) const override
getNonLocalFrameIndexReference - This method returns the offset used to reference a frame index locat...
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
bool keepFramePointer(const MachineFunction &MF) const
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
ARMFrameLowering(const ARMSubtarget &sti)
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool requiresAAPCSFrameRecord(const MachineFunction &MF) const
bool isFPReserved(const MachineFunction &MF) const
isFPReserved - Return true if the frame pointer register should be considered a reserved register on ...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override
getCalleeSavedSpillSlots - This method returns a pointer to an array of pairs, that contains an entry...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, int SPAdj) const
void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const override
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
const ARMSubtarget & STI
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
unsigned getFPCXTSaveAreaSize() const
unsigned getGPRCalleeSavedArea1Size() const
unsigned getDPRCalleeSavedGapSize() const
unsigned getDPRCalleeSavedArea1Size() const
void setDPRCalleeSavedArea1Offset(unsigned o)
void setGPRCalleeSavedArea2Size(unsigned s)
void setFramePtrSpillOffset(unsigned o)
unsigned getGPRCalleeSavedArea2Size() const
unsigned getNumAlignedDPRCS2Regs() const
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getArgumentStackToRestore() const
void setFPCXTSaveAreaSize(unsigned s)
unsigned getGPRCalleeSavedArea3Size() const
unsigned getFramePtrSpillOffset() const
unsigned getArgRegsSaveSize() const
void setGPRCalleeSavedArea2Offset(unsigned o)
void setGPRCalleeSavedArea1Offset(unsigned o)
void setDPRCalleeSavedArea1Size(unsigned s)
void setDPRCalleeSavedGapSize(unsigned s)
void setFPStatusSavesSize(unsigned s)
unsigned getArgumentStackSize() const
unsigned getReturnRegsCount() const
void setGPRCalleeSavedArea3Size(unsigned s)
unsigned getFPStatusSavesSize() const
const ARMBaseRegisterInfo * getRegisterInfo() const override
enum PushPopSplitVariation getPushPopSplitVariation(const MachineFunction &MF) const
PushPopSplitVariation
How the push and pop instructions of callee saved general-purpose registers should be split.
@ SplitR11WindowsSEH
When the stack frame size is not known (because of variable-sized objects or realignment),...
@ SplitR7
R7 and LR must be adjacent, because R7 is the frame pointer, and must point to a frame record consist...
@ SplitR11AAPCSSignRA
When generating AAPCS-compilant frame chains, R11 is the frame pointer, and must be pushed adjacent t...
@ NoSplit
All GPRs can be pushed in a single instruction.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
bool test(unsigned Idx) const
Returns true if bit Idx is set.
Definition BitVector.h:482
BitVector & reset()
Reset all bits in the bitvector.
Definition BitVector.h:409
BitVector & set()
Set all bits in the bitvector.
Definition BitVector.h:366
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void buildDefCFARegister(MCRegister Reg) const
void buildSameValue(MCRegister Reg) const
void buildOffset(MCRegister Reg, int64_t Offset) const
void buildDefCFA(MCRegister Reg, int64_t Offset) const
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
A debug info location.
Definition DebugLoc.h:123
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
bool usesWindowsCFI() const
Definition MCAsmInfo.h:678
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
iterator_range< livein_iterator > liveins() const
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int getStackProtectorIndex() const
Return the index for the stack protector object.
int64_t getOffsetAdjustment() const
Return the correction for frame offsets.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
LLVM_ABI BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
Representation of each machine instruction.
LLVM_ABI bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI bool isLiveIn(Register Reg) const
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
Wrapper class representing virtual and physical registers.
Definition Register.h:20
bool erase(PtrType Ptr)
Remove pointer from the set.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
virtual void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
TargetFrameLowering(StackDirection D, Align StackAl, int LAO, Align TransAl=Align(1), bool StackReal=true)
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment.
virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const
getFrameIndexReference - This method should return the base register and offset used to reference a f...
TargetInstrInfo - Interface to description of machine instruction set.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
Primary interface to the complete machine description for the target machine.
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
TargetOptions Options
LLVM_ABI bool FramePointerIsReserved(const MachineFunction &MF) const
FramePointerIsReserved - This returns true if the frame pointer must always either point to a new fra...
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
LLVM Value Representation.
Definition Value.h:75
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
@ D16
Only 16 D registers.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:558
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Define
Register definition.
constexpr RegState getKillRegState(bool B)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr RegState getDefRegState(bool B)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77