LLVM 20.0.0git
ARMFrameLowering.cpp
Go to the documentation of this file.
1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
115#include "ARMSubtarget.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCContext.h"
144#include "llvm/MC/MCDwarf.h"
145#include "llvm/MC/MCInstrDesc.h"
147#include "llvm/Support/CodeGen.h"
150#include "llvm/Support/Debug.h"
155#include <algorithm>
156#include <cassert>
157#include <cstddef>
158#include <cstdint>
159#include <iterator>
160#include <utility>
161#include <vector>
162
163#define DEBUG_TYPE "arm-frame-lowering"
164
165using namespace llvm;
166
167static cl::opt<bool>
168SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
169 cl::desc("Align ARM NEON spills in prolog and epilog"));
170
173 unsigned NumAlignedDPRCS2Regs);
174
175enum class SpillArea {
176 GPRCS1,
177 GPRCS2,
178 DPRCS1,
179 DPRCS2,
180 GPRCS3,
181 FPCXT,
182};
183
184/// Get the spill area that Reg should be saved into in the prologue.
187 unsigned NumAlignedDPRCS2Regs,
189 // NoSplit:
190 // push {r0-r12, lr} GPRCS1
191 // vpush {r8-d15} DPRCS1
192 //
193 // SplitR7:
194 // push {r0-r7, lr} GPRCS1
195 // push {r8-r12} GPRCS2
196 // vpush {r8-d15} DPRCS1
197 //
198 // SplitR11WindowsSEH:
199 // push {r0-r10, r12} GPRCS1
200 // vpush {r8-d15} DPRCS1
201 // push {r11, lr} GPRCS3
202 //
203 // SplitR11AAPCSSignRA:
204 // push {r0-r10, r12} GPRSC1
205 // push {r11, lr} GPRCS2
206 // vpush {r8-d15} DPRCS1
207
208 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
209 // the top of the stack frame.
210 // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
211 // of SP. If used, it will be below the other save areas, after the stack has
212 // been re-aligned.
213
214 switch (Reg) {
215 default:
216 dbgs() << "Don't know where to spill " << printReg(Reg, RegInfo) << "\n";
217 llvm_unreachable("Don't know where to spill this register");
218 break;
219
220 case ARM::FPCXTNS:
221 return SpillArea::FPCXT;
222
223 case ARM::R0:
224 case ARM::R1:
225 case ARM::R2:
226 case ARM::R3:
227 case ARM::R4:
228 case ARM::R5:
229 case ARM::R6:
230 case ARM::R7:
231 return SpillArea::GPRCS1;
232
233 case ARM::R8:
234 case ARM::R9:
235 case ARM::R10:
236 if (Variation == ARMSubtarget::SplitR7)
237 return SpillArea::GPRCS2;
238 else
239 return SpillArea::GPRCS1;
240
241 case ARM::R11:
242 if (Variation == ARMSubtarget::SplitR7 ||
244 return SpillArea::GPRCS2;
245 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
246 return SpillArea::GPRCS3;
247
248 return SpillArea::GPRCS1;
249
250 case ARM::R12:
251 if (Variation == ARMSubtarget::SplitR7)
252 return SpillArea::GPRCS2;
253 else
254 return SpillArea::GPRCS1;
255
256 case ARM::LR:
257 if (Variation == ARMSubtarget::SplitR11AAPCSSignRA)
258 return SpillArea::GPRCS2;
259 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
260 return SpillArea::GPRCS3;
261
262 return SpillArea::GPRCS1;
263
264 case ARM::D0:
265 case ARM::D1:
266 case ARM::D2:
267 case ARM::D3:
268 case ARM::D4:
269 case ARM::D5:
270 case ARM::D6:
271 case ARM::D7:
272 return SpillArea::DPRCS1;
273
274 case ARM::D8:
275 case ARM::D9:
276 case ARM::D10:
277 case ARM::D11:
278 case ARM::D12:
279 case ARM::D13:
280 case ARM::D14:
281 case ARM::D15:
282 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
283 return SpillArea::DPRCS2;
284 else
285 return SpillArea::DPRCS1;
286
287 case ARM::D16:
288 case ARM::D17:
289 case ARM::D18:
290 case ARM::D19:
291 case ARM::D20:
292 case ARM::D21:
293 case ARM::D22:
294 case ARM::D23:
295 case ARM::D24:
296 case ARM::D25:
297 case ARM::D26:
298 case ARM::D27:
299 case ARM::D28:
300 case ARM::D29:
301 case ARM::D30:
302 case ARM::D31:
303 return SpillArea::DPRCS1;
304 }
305}
306
308 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
309 STI(sti) {}
310
312 // iOS always has a FP for backtracking, force other targets to keep their FP
313 // when doing FastISel. The emitted code is currently superior, and in cases
314 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
315 return MF.getSubtarget<ARMSubtarget>().useFastISel();
316}
317
318/// Returns true if the target can safely skip saving callee-saved registers
319/// for noreturn nounwind functions.
321 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
322 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
323 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
324
325 // Frame pointer and link register are not treated as normal CSR, thus we
326 // can always skip CSR saves for nonreturning functions.
327 return true;
328}
329
330/// hasFPImpl - Return true if the specified function should have a dedicated
331/// frame pointer register. This is true if the function has variable sized
332/// allocas or if frame pointer elimination is disabled.
334 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
335 const MachineFrameInfo &MFI = MF.getFrameInfo();
336
337 // Check to see if the target want to forcibly keep frame pointer.
338 if (keepFramePointer(MF))
339 return true;
340
341 // ABI-required frame pointer.
343 return true;
344
345 // Frame pointer required for use within this function.
346 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
347 MFI.isFrameAddressTaken());
348}
349
350/// isFPReserved - Return true if the frame pointer register should be
351/// considered a reserved register on the scope of the specified function.
353 return hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF);
354}
355
356/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
357/// not required, we reserve argument space for call sites in the function
358/// immediately on entry to the current function. This eliminates the need for
359/// add/sub sp brackets around call sites. Returns true if the call frame is
360/// included as part of the stack frame.
362 const MachineFrameInfo &MFI = MF.getFrameInfo();
363 unsigned CFSize = MFI.getMaxCallFrameSize();
364 // It's not always a good idea to include the call frame as part of the
365 // stack frame. ARM (especially Thumb) has small immediate offset to
366 // address the stack frame. So a large call frame can cause poor codegen
367 // and may even makes it impossible to scavenge a register.
368 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
369 return false;
370
371 return !MFI.hasVarSizedObjects();
372}
373
374/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
375/// call frame pseudos can be simplified. Unlike most targets, having a FP
376/// is not sufficient here since we still may reference some objects via SP
377/// even when FP is available in Thumb2 mode.
378bool
381}
382
383// Returns how much of the incoming argument stack area we should clean up in an
384// epilogue. For the C calling convention this will be 0, for guaranteed tail
385// call conventions it can be positive (a normal return or a tail call to a
386// function that uses less stack space for arguments) or negative (for a tail
387// call to a function that needs more stack space than us for arguments).
391 bool IsTailCallReturn = false;
392 if (MBB.end() != MBBI) {
393 unsigned RetOpcode = MBBI->getOpcode();
394 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
395 RetOpcode == ARM::TCRETURNri ||
396 RetOpcode == ARM::TCRETURNrinotr12;
397 }
399
400 int ArgumentPopSize = 0;
401 if (IsTailCallReturn) {
402 MachineOperand &StackAdjust = MBBI->getOperand(1);
403
404 // For a tail-call in a callee-pops-arguments environment, some or all of
405 // the stack may actually be in use for the call's arguments, this is
406 // calculated during LowerCall and consumed here...
407 ArgumentPopSize = StackAdjust.getImm();
408 } else {
409 // ... otherwise the amount to pop is *all* of the argument space,
410 // conveniently stored in the MachineFunctionInfo by
411 // LowerFormalArguments. This will, of course, be zero for the C calling
412 // convention.
413 ArgumentPopSize = AFI->getArgumentStackToRestore();
414 }
415
416 return ArgumentPopSize;
417}
418
419static bool needsWinCFI(const MachineFunction &MF) {
420 const Function &F = MF.getFunction();
421 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
422 F.needsUnwindTableEntry();
423}
424
425// Given a load or a store instruction, generate an appropriate unwinding SEH
426// code on Windows.
428 const TargetInstrInfo &TII,
429 unsigned Flags) {
430 unsigned Opc = MBBI->getOpcode();
432 MachineFunction &MF = *MBB->getParent();
433 DebugLoc DL = MBBI->getDebugLoc();
435 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
436 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
437
438 Flags |= MachineInstr::NoMerge;
439
440 switch (Opc) {
441 default:
442 report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
443 break;
444 case ARM::t2ADDri: // add.w r11, sp, #xx
445 case ARM::t2ADDri12: // add.w r11, sp, #xx
446 case ARM::t2MOVTi16: // movt r4, #xx
447 case ARM::tBL: // bl __chkstk
448 // These are harmless if used for just setting up a frame pointer,
449 // but that frame pointer can't be relied upon for unwinding, unless
450 // set up with SEH_SaveSP.
451 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
452 .addImm(/*Wide=*/1)
453 .setMIFlags(Flags);
454 break;
455
456 case ARM::t2MOVi16: { // mov(w) r4, #xx
457 bool Wide = MBBI->getOperand(1).getImm() >= 256;
458 if (!Wide) {
459 MachineInstrBuilder NewInstr =
460 BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
461 NewInstr.add(MBBI->getOperand(0));
462 NewInstr.add(t1CondCodeOp(/*isDead=*/true));
463 for (MachineOperand &MO : llvm::drop_begin(MBBI->operands()))
464 NewInstr.add(MO);
465 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
466 MBB->erase(MBBI);
467 MBBI = NewMBBI;
468 }
469 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
470 break;
471 }
472
473 case ARM::tBLXr: // blx r12 (__chkstk)
474 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
475 .addImm(/*Wide=*/0)
476 .setMIFlags(Flags);
477 break;
478
479 case ARM::t2MOVi32imm: // movw+movt
480 // This pseudo instruction expands into two mov instructions. If the
481 // second operand is a symbol reference, this will stay as two wide
482 // instructions, movw+movt. If they're immediates, the first one can
483 // end up as a narrow mov though.
484 // As two SEH instructions are appended here, they won't get interleaved
485 // between the two final movw/movt instructions, but it doesn't make any
486 // practical difference.
487 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
488 .addImm(/*Wide=*/1)
489 .setMIFlags(Flags);
490 MBB->insertAfter(MBBI, MIB);
491 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
492 .addImm(/*Wide=*/1)
493 .setMIFlags(Flags);
494 break;
495
496 case ARM::t2STR_PRE:
497 if (MBBI->getOperand(0).getReg() == ARM::SP &&
498 MBBI->getOperand(2).getReg() == ARM::SP &&
499 MBBI->getOperand(3).getImm() == -4) {
500 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
501 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
502 .addImm(1ULL << Reg)
503 .addImm(/*Wide=*/1)
504 .setMIFlags(Flags);
505 } else {
506 report_fatal_error("No matching SEH Opcode for t2STR_PRE");
507 }
508 break;
509
510 case ARM::t2LDR_POST:
511 if (MBBI->getOperand(1).getReg() == ARM::SP &&
512 MBBI->getOperand(2).getReg() == ARM::SP &&
513 MBBI->getOperand(3).getImm() == 4) {
514 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
515 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
516 .addImm(1ULL << Reg)
517 .addImm(/*Wide=*/1)
518 .setMIFlags(Flags);
519 } else {
520 report_fatal_error("No matching SEH Opcode for t2LDR_POST");
521 }
522 break;
523
524 case ARM::t2LDMIA_RET:
525 case ARM::t2LDMIA_UPD:
526 case ARM::t2STMDB_UPD: {
527 unsigned Mask = 0;
528 bool Wide = false;
529 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
530 const MachineOperand &MO = MBBI->getOperand(i);
531 if (!MO.isReg() || MO.isImplicit())
532 continue;
533 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
534 if (Reg == 15)
535 Reg = 14;
536 if (Reg >= 8 && Reg <= 13)
537 Wide = true;
538 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
539 Wide = true;
540 Mask |= 1 << Reg;
541 }
542 if (!Wide) {
543 unsigned NewOpc;
544 switch (Opc) {
545 case ARM::t2LDMIA_RET:
546 NewOpc = ARM::tPOP_RET;
547 break;
548 case ARM::t2LDMIA_UPD:
549 NewOpc = ARM::tPOP;
550 break;
551 case ARM::t2STMDB_UPD:
552 NewOpc = ARM::tPUSH;
553 break;
554 default:
556 }
557 MachineInstrBuilder NewInstr =
558 BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
559 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
560 NewInstr.add(MBBI->getOperand(i));
561 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
562 MBB->erase(MBBI);
563 MBBI = NewMBBI;
564 }
565 unsigned SEHOpc =
566 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
567 MIB = BuildMI(MF, DL, TII.get(SEHOpc))
568 .addImm(Mask)
569 .addImm(Wide ? 1 : 0)
570 .setMIFlags(Flags);
571 break;
572 }
573 case ARM::VSTMDDB_UPD:
574 case ARM::VLDMDIA_UPD: {
575 int First = -1, Last = 0;
576 for (const MachineOperand &MO : llvm::drop_begin(MBBI->operands(), 4)) {
577 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
578 if (First == -1)
579 First = Reg;
580 Last = Reg;
581 }
582 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
583 .addImm(First)
584 .addImm(Last)
585 .setMIFlags(Flags);
586 break;
587 }
588 case ARM::tSUBspi:
589 case ARM::tADDspi:
590 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
591 .addImm(MBBI->getOperand(2).getImm() * 4)
592 .addImm(/*Wide=*/0)
593 .setMIFlags(Flags);
594 break;
595 case ARM::t2SUBspImm:
596 case ARM::t2SUBspImm12:
597 case ARM::t2ADDspImm:
598 case ARM::t2ADDspImm12:
599 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
600 .addImm(MBBI->getOperand(2).getImm())
601 .addImm(/*Wide=*/1)
602 .setMIFlags(Flags);
603 break;
604
605 case ARM::tMOVr:
606 if (MBBI->getOperand(1).getReg() == ARM::SP &&
607 (Flags & MachineInstr::FrameSetup)) {
608 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
609 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
610 .addImm(Reg)
611 .setMIFlags(Flags);
612 } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
613 (Flags & MachineInstr::FrameDestroy)) {
614 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
615 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
616 .addImm(Reg)
617 .setMIFlags(Flags);
618 } else {
619 report_fatal_error("No SEH Opcode for MOV");
620 }
621 break;
622
623 case ARM::tBX_RET:
624 case ARM::TCRETURNri:
625 case ARM::TCRETURNrinotr12:
626 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
627 .addImm(/*Wide=*/0)
628 .setMIFlags(Flags);
629 break;
630
631 case ARM::TCRETURNdi:
632 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
633 .addImm(/*Wide=*/1)
634 .setMIFlags(Flags);
635 break;
636 }
637 return MBB->insertAfter(MBBI, MIB);
638}
639
642 if (MBBI == MBB.begin())
644 return std::prev(MBBI);
645}
646
650 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
651 if (Start.isValid())
652 Start = std::next(Start);
653 else
654 Start = MBB.begin();
655
656 for (auto MI = Start; MI != End;) {
657 auto Next = std::next(MI);
658 // Check if this instruction already has got a SEH opcode added. In that
659 // case, don't do this generic mapping.
660 if (Next != End && isSEHInstruction(*Next)) {
661 MI = std::next(Next);
662 while (MI != End && isSEHInstruction(*MI))
663 ++MI;
664 continue;
665 }
666 insertSEH(MI, TII, MIFlags);
667 MI = Next;
668 }
669}
670
673 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
674 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
675 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
676 if (isARM)
677 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
678 Pred, PredReg, TII, MIFlags);
679 else
680 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
681 Pred, PredReg, TII, MIFlags);
682}
683
684static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
686 const ARMBaseInstrInfo &TII, int NumBytes,
687 unsigned MIFlags = MachineInstr::NoFlags,
689 unsigned PredReg = 0) {
690 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
691 MIFlags, Pred, PredReg);
692}
693
695 int RegSize;
696 switch (MI.getOpcode()) {
697 case ARM::VSTMDDB_UPD:
698 RegSize = 8;
699 break;
700 case ARM::STMDB_UPD:
701 case ARM::t2STMDB_UPD:
702 RegSize = 4;
703 break;
704 case ARM::t2STR_PRE:
705 case ARM::STR_PRE_IMM:
706 return 4;
707 default:
708 llvm_unreachable("Unknown push or pop like instruction");
709 }
710
711 int count = 0;
712 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
713 // pred) so the list starts at 4.
714 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
715 count += RegSize;
716 return count;
717}
718
720 size_t StackSizeInBytes) {
721 const MachineFrameInfo &MFI = MF.getFrameInfo();
722 const Function &F = MF.getFunction();
723 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
724
725 StackProbeSize =
726 F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);
727 return (StackSizeInBytes >= StackProbeSize) &&
728 !F.hasFnAttribute("no-stack-arg-probe");
729}
730
731namespace {
732
733struct StackAdjustingInsts {
734 struct InstInfo {
736 unsigned SPAdjust;
737 bool BeforeFPSet;
738
739#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
740 void dump() {
741 dbgs() << " " << (BeforeFPSet ? "before-fp " : " ")
742 << "sp-adjust=" << SPAdjust;
743 I->dump();
744 }
745#endif
746 };
747
749
750 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
751 bool BeforeFPSet = false) {
752 InstInfo Info = {I, SPAdjust, BeforeFPSet};
753 Insts.push_back(Info);
754 }
755
756 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
757 auto Info =
758 llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
759 assert(Info != Insts.end() && "invalid sp adjusting instruction");
760 Info->SPAdjust += ExtraBytes;
761 }
762
763 void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
764 const ARMBaseInstrInfo &TII, bool HasFP) {
766 unsigned CFAOffset = 0;
767 for (auto &Info : Insts) {
768 if (HasFP && !Info.BeforeFPSet)
769 return;
770
771 CFAOffset += Info.SPAdjust;
772 unsigned CFIIndex = MF.addFrameInst(
773 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
774 BuildMI(MBB, std::next(Info.I), dl,
775 TII.get(TargetOpcode::CFI_INSTRUCTION))
776 .addCFIIndex(CFIIndex)
778 }
779 }
780
781#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
782 void dump() {
783 dbgs() << "StackAdjustingInsts:\n";
784 for (auto &Info : Insts)
785 Info.dump();
786 }
787#endif
788};
789
790} // end anonymous namespace
791
792/// Emit an instruction sequence that will align the address in
793/// register Reg by zero-ing out the lower bits. For versions of the
794/// architecture that support Neon, this must be done in a single
795/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
796/// single instruction. That function only gets called when optimizing
797/// spilling of D registers on a core with the Neon instruction set
798/// present.
800 const TargetInstrInfo &TII,
803 const DebugLoc &DL, const unsigned Reg,
804 const Align Alignment,
805 const bool MustBeSingleInstruction) {
806 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
807 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
808 const unsigned AlignMask = Alignment.value() - 1U;
809 const unsigned NrBitsToZero = Log2(Alignment);
810 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
811 if (!AFI->isThumbFunction()) {
812 // if the BFC instruction is available, use that to zero the lower
813 // bits:
814 // bfc Reg, #0, log2(Alignment)
815 // otherwise use BIC, if the mask to zero the required number of bits
816 // can be encoded in the bic immediate field
817 // bic Reg, Reg, Alignment-1
818 // otherwise, emit
819 // lsr Reg, Reg, log2(Alignment)
820 // lsl Reg, Reg, log2(Alignment)
821 if (CanUseBFC) {
822 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
824 .addImm(~AlignMask)
826 } else if (AlignMask <= 255) {
827 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
829 .addImm(AlignMask)
831 .add(condCodeOp());
832 } else {
833 assert(!MustBeSingleInstruction &&
834 "Shouldn't call emitAligningInstructions demanding a single "
835 "instruction to be emitted for large stack alignment for a target "
836 "without BFC.");
837 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
839 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
841 .add(condCodeOp());
842 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
844 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
846 .add(condCodeOp());
847 }
848 } else {
849 // Since this is only reached for Thumb-2 targets, the BFC instruction
850 // should always be available.
851 assert(CanUseBFC);
852 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
854 .addImm(~AlignMask)
856 }
857}
858
859/// We need the offset of the frame pointer relative to other MachineFrameInfo
860/// offsets which are encoded relative to SP at function begin.
861/// See also emitPrologue() for how the FP is set up.
862/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
863/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
864/// this to produce a conservative estimate that we check in an assert() later.
865static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
866 const MachineFunction &MF) {
869 // For Thumb1, push.w isn't available, so the first push will always push
870 // r7 and lr onto the stack first.
871 if (AFI.isThumb1OnlyFunction())
872 return -AFI.getArgRegsSaveSize() - (2 * 4);
873 // This is a conservative estimation: Assume the frame pointer being r7 and
874 // pc("r15") up to r8 getting spilled before (= 8 registers).
875 int MaxRegBytes = 8 * 4;
876 if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
877 // Here, r11 can be stored below all of r4-r15.
878 MaxRegBytes = 11 * 4;
879 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
880 // Here, r11 can be stored below all of r4-r15 plus d8-d15.
881 MaxRegBytes = 11 * 4 + 8 * 8;
882 }
883 int FPCXTSaveSize =
884 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
885 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
886}
887
889 MachineBasicBlock &MBB) const {
891 MachineFrameInfo &MFI = MF.getFrameInfo();
893 MCContext &Context = MF.getContext();
894 const TargetMachine &TM = MF.getTarget();
895 const MCRegisterInfo *MRI = Context.getRegisterInfo();
896 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
899 "This emitPrologue does not support Thumb1!");
900 bool isARM = !AFI->isThumbFunction();
901 Align Alignment = STI.getFrameLowering()->getStackAlign();
902 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
903 unsigned NumBytes = MFI.getStackSize();
904 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
905 int FPCXTSaveSize = 0;
906 bool NeedsWinCFI = needsWinCFI(MF);
909
910 LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
911
912 // Debug location must be unknown since the first debug location is used
913 // to determine the end of the prologue.
914 DebugLoc dl;
915
916 Register FramePtr = RegInfo->getFrameRegister(MF);
917
918 // Determine the sizes of each callee-save spill areas and record which frame
919 // belongs to which callee-save spill areas.
920 unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCS1Size = 0, GPRCS3Size = 0,
921 DPRCS2Size = 0;
922 int FramePtrSpillFI = 0;
923 int D8SpillFI = 0;
924
925 // All calls are tail calls in GHC calling conv, and functions have no
926 // prologue/epilogue.
928 return;
929
930 StackAdjustingInsts DefCFAOffsetCandidates;
931 bool HasFP = hasFP(MF);
932
933 if (!AFI->hasStackFrame() &&
934 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
935 if (NumBytes != 0) {
936 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
938 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
939 }
940 if (!NeedsWinCFI)
941 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
942 if (NeedsWinCFI && MBBI != MBB.begin()) {
944 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
946 MF.setHasWinCFI(true);
947 }
948 return;
949 }
950
951 // Determine spill area sizes, and some important frame indices.
952 SpillArea FramePtrSpillArea = SpillArea::GPRCS1;
953 bool BeforeFPPush = true;
954 for (const CalleeSavedInfo &I : CSI) {
955 Register Reg = I.getReg();
956 int FI = I.getFrameIdx();
957
958 SpillArea Area = getSpillArea(Reg, PushPopSplit,
959 AFI->getNumAlignedDPRCS2Regs(), RegInfo);
960
961 if (Reg == FramePtr) {
962 FramePtrSpillFI = FI;
963 FramePtrSpillArea = Area;
964 }
965 if (Reg == ARM::D8)
966 D8SpillFI = FI;
967
968 switch (Area) {
969 case SpillArea::FPCXT:
970 FPCXTSaveSize += 4;
971 break;
972 case SpillArea::GPRCS1:
973 GPRCS1Size += 4;
974 break;
975 case SpillArea::GPRCS2:
976 GPRCS2Size += 4;
977 break;
978 case SpillArea::DPRCS1:
979 DPRCS1Size += 8;
980 break;
981 case SpillArea::GPRCS3:
982 GPRCS3Size += 4;
983 break;
984 case SpillArea::DPRCS2:
985 DPRCS2Size += 4;
986 break;
987 }
988 }
989
990 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
991 DPRCS1Push, GPRCS3Push;
992
993 // Move past the PAC computation.
994 if (AFI->shouldSignReturnAddress())
995 LastPush = MBBI++;
996
997 // Move past FPCXT area.
998 if (FPCXTSaveSize > 0) {
999 LastPush = MBBI++;
1000 DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush);
1001 }
1002
1003 // Allocate the vararg register save area.
1004 if (ArgRegsSaveSize) {
1005 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
1007 LastPush = std::prev(MBBI);
1008 DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush);
1009 }
1010
1011 // Move past area 1.
1012 if (GPRCS1Size > 0) {
1013 GPRCS1Push = LastPush = MBBI++;
1014 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush);
1015 if (FramePtrSpillArea == SpillArea::GPRCS1)
1016 BeforeFPPush = false;
1017 }
1018
1019 // Determine starting offsets of spill areas. These offsets are all positive
1020 // offsets from the bottom of the lowest-addressed callee-save area
1021 // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
1022 // of the spill area in question.
1023 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
1024 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
1025 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
1026
1027 Align DPRAlign = DPRCS1Size ? std::min(Align(8), Alignment) : Align(4);
1028 unsigned DPRGapSize =
1029 (ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size + GPRCS2Size) %
1030 DPRAlign.value();
1031
1032 unsigned DPRCS1Offset = GPRCS2Offset - DPRGapSize - DPRCS1Size;
1033
1034 if (HasFP) {
1035 // Offset from the CFA to the saved frame pointer, will be negative.
1036 [[maybe_unused]] int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
1037 LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
1038 << ", FPOffset: " << FPOffset << "\n");
1039 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
1040 "Max FP estimation is wrong");
1041 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
1042 NumBytes);
1043 }
1044 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
1045 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
1046 AFI->setDPRCalleeSavedArea1Offset(DPRCS1Offset);
1047
1048 // Move past area 2.
1049 if (GPRCS2Size > 0) {
1051 GPRCS2Push = LastPush = MBBI++;
1052 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
1053 if (FramePtrSpillArea == SpillArea::GPRCS2)
1054 BeforeFPPush = false;
1055 }
1056
1057 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
1058 // .cfi_offset operations will reflect that.
1059 if (DPRGapSize) {
1060 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
1061 if (LastPush != MBB.end() &&
1062 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
1063 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
1064 else {
1065 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
1067 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush);
1068 }
1069 }
1070
1071 // Move past DPRCS1Size.
1072 if (DPRCS1Size > 0) {
1073 // Since vpush register list cannot have gaps, there may be multiple vpush
1074 // instructions in the prologue.
1075 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
1076 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI),
1077 BeforeFPPush);
1078 DPRCS1Push = LastPush = MBBI++;
1079 }
1080 }
1081
1082 // Move past the aligned DPRCS2 area.
1083 if (DPRCS2Size > 0) {
1085 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
1086 // leaves the stack pointer pointing to the DPRCS2 area.
1087 //
1088 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
1089 NumBytes += MFI.getObjectOffset(D8SpillFI);
1090 } else
1091 NumBytes = DPRCS1Offset;
1092
1093 // Move GPRCS3, if using using SplitR11WindowsSEH.
1094 if (GPRCS3Size > 0) {
1096 GPRCS3Push = LastPush = MBBI++;
1097 DefCFAOffsetCandidates.addInst(LastPush, GPRCS3Size, BeforeFPPush);
1098 if (FramePtrSpillArea == SpillArea::GPRCS3)
1099 BeforeFPPush = false;
1100 }
1101
1102 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
1103 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH && HasFP)
1104 NeedsWinCFIStackAlloc = false;
1105
1106 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
1107 uint32_t NumWords = NumBytes >> 2;
1108
1109 if (NumWords < 65536) {
1110 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
1111 .addImm(NumWords)
1114 } else {
1115 // Split into two instructions here, instead of using t2MOVi32imm,
1116 // to allow inserting accurate SEH instructions (including accurate
1117 // instruction size for each of them).
1118 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
1119 .addImm(NumWords & 0xffff)
1122 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
1123 .addReg(ARM::R4)
1124 .addImm(NumWords >> 16)
1127 }
1128
1129 switch (TM.getCodeModel()) {
1130 case CodeModel::Tiny:
1131 llvm_unreachable("Tiny code model not available on ARM.");
1132 case CodeModel::Small:
1133 case CodeModel::Medium:
1134 case CodeModel::Kernel:
1135 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1137 .addExternalSymbol("__chkstk")
1138 .addReg(ARM::R4, RegState::Implicit)
1140 break;
1141 case CodeModel::Large:
1142 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1143 .addExternalSymbol("__chkstk")
1145
1146 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1148 .addReg(ARM::R12, RegState::Kill)
1149 .addReg(ARM::R4, RegState::Implicit)
1151 break;
1152 }
1153
1154 MachineInstrBuilder Instr, SEH;
1155 Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1156 .addReg(ARM::SP, RegState::Kill)
1157 .addReg(ARM::R4, RegState::Kill)
1160 .add(condCodeOp());
1161 if (NeedsWinCFIStackAlloc) {
1162 SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1163 .addImm(NumBytes)
1164 .addImm(/*Wide=*/1)
1166 MBB.insertAfter(Instr, SEH);
1167 }
1168 NumBytes = 0;
1169 }
1170
1171 if (NumBytes) {
1172 // Adjust SP after all the callee-save spills.
1173 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1174 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
1175 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
1176 else {
1177 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
1179 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
1180 }
1181
1182 if (HasFP && isARM)
1183 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1184 // Note it's not safe to do this in Thumb2 mode because it would have
1185 // taken two instructions:
1186 // mov sp, r7
1187 // sub sp, #24
1188 // If an interrupt is taken between the two instructions, then sp is in
1189 // an inconsistent state (pointing to the middle of callee-saved area).
1190 // The interrupt handler can end up clobbering the registers.
1191 AFI->setShouldRestoreSPFromFP(true);
1192 }
1193
1194 // Set FP to point to the stack slot that contains the previous FP.
1195 // For iOS, FP is R7, which has now been stored in spill area 1.
1196 // Otherwise, if this is not iOS, all the callee-saved registers go
1197 // into spill area 1, including the FP in R11. In either case, it
1198 // is in area one and the adjustment needs to take place just after
1199 // that push.
1201 if (HasFP) {
1202 MachineBasicBlock::iterator FPPushInst;
1203 // Offset from SP immediately after the push which saved the FP to the FP
1204 // save slot.
1205 int64_t FPOffsetAfterPush;
1206 switch (FramePtrSpillArea) {
1207 case SpillArea::GPRCS1:
1208 FPPushInst = GPRCS1Push;
1209 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1210 ArgRegsSaveSize + FPCXTSaveSize +
1211 sizeOfSPAdjustment(*FPPushInst);
1212 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1213 << FPOffsetAfterPush << " after that push\n");
1214 break;
1215 case SpillArea::GPRCS2:
1216 FPPushInst = GPRCS2Push;
1217 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1218 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1219 sizeOfSPAdjustment(*FPPushInst);
1220 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1221 << FPOffsetAfterPush << " after that push\n");
1222 break;
1223 case SpillArea::GPRCS3:
1224 FPPushInst = GPRCS3Push;
1225 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1226 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1227 GPRCS2Size + DPRCS1Size + DPRGapSize +
1228 sizeOfSPAdjustment(*FPPushInst);
1229 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS3, offset "
1230 << FPOffsetAfterPush << " after that push\n");
1231 break;
1232 default:
1233 llvm_unreachable("frame pointer in unknown spill area");
1234 break;
1235 }
1236 AfterPush = std::next(FPPushInst);
1237 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1238 assert(FPOffsetAfterPush == 0);
1239
1240 // Emit the MOV or ADD to set up the frame pointer register.
1241 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1242 FramePtr, ARM::SP, FPOffsetAfterPush,
1244
1245 if (!NeedsWinCFI) {
1246 // Emit DWARF info to find the CFA using the frame pointer from this
1247 // point onward.
1248 if (FPOffsetAfterPush != 0) {
1249 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
1250 nullptr, MRI->getDwarfRegNum(FramePtr, true),
1251 -MFI.getObjectOffset(FramePtrSpillFI)));
1252 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1253 .addCFIIndex(CFIIndex)
1255 } else {
1256 unsigned CFIIndex =
1258 nullptr, MRI->getDwarfRegNum(FramePtr, true)));
1259 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1260 .addCFIIndex(CFIIndex)
1262 }
1263 }
1264 }
1265
1266 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1267 // instructions below don't need to be replayed to unwind the stack.
1268 if (NeedsWinCFI && MBBI != MBB.begin()) {
1270 if (HasFP && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1271 End = AfterPush;
1273 BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1275 MF.setHasWinCFI(true);
1276 }
1277
1278 // Now that the prologue's actual instructions are finalised, we can insert
1279 // the necessary DWARF cf instructions to describe the situation. Start by
1280 // recording where each register ended up:
1281 if (!NeedsWinCFI) {
1282 for (const auto &Entry : reverse(CSI)) {
1283 Register Reg = Entry.getReg();
1284 int FI = Entry.getFrameIdx();
1286 switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
1287 RegInfo)) {
1288 case SpillArea::GPRCS1:
1289 CFIPos = std::next(GPRCS1Push);
1290 break;
1291 case SpillArea::GPRCS2:
1292 CFIPos = std::next(GPRCS2Push);
1293 break;
1294 case SpillArea::DPRCS1:
1295 CFIPos = std::next(DPRCS1Push);
1296 break;
1297 case SpillArea::GPRCS3:
1298 CFIPos = std::next(GPRCS3Push);
1299 break;
1300 case SpillArea::FPCXT:
1301 case SpillArea::DPRCS2:
1302 // FPCXT and DPRCS2 are not represented in the DWARF info.
1303 break;
1304 }
1305
1306 if (CFIPos.isValid()) {
1308 nullptr,
1309 MRI->getDwarfRegNum(Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg,
1310 true),
1311 MFI.getObjectOffset(FI)));
1312 BuildMI(MBB, CFIPos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1313 .addCFIIndex(CFIIndex)
1315 }
1316 }
1317 }
1318
1319 // Now we can emit descriptions of where the canonical frame address was
1320 // throughout the process. If we have a frame pointer, it takes over the job
1321 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1322 // actually get emitted.
1323 if (!NeedsWinCFI) {
1324 LLVM_DEBUG(DefCFAOffsetCandidates.dump());
1325 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
1326 }
1327
1328 if (STI.isTargetELF() && hasFP(MF))
1330 AFI->getFramePtrSpillOffset());
1331
1332 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1333 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1334 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1335 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1336 AFI->setDPRCalleeSavedArea1Size(DPRCS1Size);
1337 AFI->setGPRCalleeSavedArea3Size(GPRCS3Size);
1338
1339 // If we need dynamic stack realignment, do it here. Be paranoid and make
1340 // sure if we also have VLAs, we have a base pointer for frame access.
1341 // If aligned NEON registers were spilled, the stack has already been
1342 // realigned.
1343 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1344 Align MaxAlign = MFI.getMaxAlign();
1346 if (!AFI->isThumbFunction()) {
1347 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1348 false);
1349 } else {
1350 // We cannot use sp as source/dest register here, thus we're using r4 to
1351 // perform the calculations. We're emitting the following sequence:
1352 // mov r4, sp
1353 // -- use emitAligningInstructions to produce best sequence to zero
1354 // -- out lower bits in r4
1355 // mov sp, r4
1356 // FIXME: It will be better just to find spare register here.
1357 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1358 .addReg(ARM::SP, RegState::Kill)
1360 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1361 false);
1362 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1363 .addReg(ARM::R4, RegState::Kill)
1365 }
1366
1367 AFI->setShouldRestoreSPFromFP(true);
1368 }
1369
1370 // If we need a base pointer, set it up here. It's whatever the value
1371 // of the stack pointer is at this point. Any variable size objects
1372 // will be allocated after this, so we can still use the base pointer
1373 // to reference locals.
1374 // FIXME: Clarify FrameSetup flags here.
1375 if (RegInfo->hasBasePointer(MF)) {
1376 if (isARM)
1377 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1378 .addReg(ARM::SP)
1380 .add(condCodeOp());
1381 else
1382 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1383 .addReg(ARM::SP)
1385 }
1386
1387 // If the frame has variable sized objects then the epilogue must restore
1388 // the sp from fp. We can assume there's an FP here since hasFP already
1389 // checks for hasVarSizedObjects.
1390 if (MFI.hasVarSizedObjects())
1391 AFI->setShouldRestoreSPFromFP(true);
1392}
1393
1395 MachineBasicBlock &MBB) const {
1396 MachineFrameInfo &MFI = MF.getFrameInfo();
1398 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1399 const ARMBaseInstrInfo &TII =
1400 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1401 assert(!AFI->isThumb1OnlyFunction() &&
1402 "This emitEpilogue does not support Thumb1!");
1403 bool isARM = !AFI->isThumbFunction();
1406
1407 LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF.getName() << "\n");
1408
1409 // Amount of stack space we reserved next to incoming args for either
1410 // varargs registers or stack arguments in tail calls made by this function.
1411 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1412
1413 // How much of the stack used by incoming arguments this function is expected
1414 // to restore in this particular epilogue.
1415 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1416 int NumBytes = (int)MFI.getStackSize();
1417 Register FramePtr = RegInfo->getFrameRegister(MF);
1418
1419 // All calls are tail calls in GHC calling conv, and functions have no
1420 // prologue/epilogue.
1422 return;
1423
1424 // First put ourselves on the first (from top) terminator instructions.
1426 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1427
1428 MachineBasicBlock::iterator RangeStart;
1429 if (!AFI->hasStackFrame()) {
1430 if (MF.hasWinCFI()) {
1431 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1433 RangeStart = initMBBRange(MBB, MBBI);
1434 }
1435
1436 if (NumBytes + IncomingArgStackToRestore != 0)
1437 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1438 NumBytes + IncomingArgStackToRestore,
1440 } else {
1441 // Unwind MBBI to point to first LDR / VLDRD.
1442 if (MBBI != MBB.begin()) {
1443 do {
1444 --MBBI;
1445 } while (MBBI != MBB.begin() &&
1447 if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1448 ++MBBI;
1449 }
1450
1451 if (MF.hasWinCFI()) {
1452 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1454 RangeStart = initMBBRange(MBB, MBBI);
1455 }
1456
1457 // Move SP to start of FP callee save spill area.
1458 NumBytes -=
1459 (ReservedArgStack + AFI->getFPCXTSaveAreaSize() +
1463
1464 // Reset SP based on frame pointer only if the stack frame extends beyond
1465 // frame pointer stack slot or target is ELF and the function has FP.
1466 if (AFI->shouldRestoreSPFromFP()) {
1467 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1468 if (NumBytes) {
1469 if (isARM)
1470 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1471 ARMCC::AL, 0, TII,
1473 else {
1474 // It's not possible to restore SP from FP in a single instruction.
1475 // For iOS, this looks like:
1476 // mov sp, r7
1477 // sub sp, #24
1478 // This is bad, if an interrupt is taken after the mov, sp is in an
1479 // inconsistent state.
1480 // Use the first callee-saved register as a scratch register.
1481 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1482 "No scratch register to restore SP from FP!");
1483 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1485 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1486 .addReg(ARM::R4)
1489 }
1490 } else {
1491 // Thumb2 or ARM.
1492 if (isARM)
1493 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1496 .add(condCodeOp())
1498 else
1499 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1503 }
1504 } else if (NumBytes &&
1505 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1506 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1508
1509 // Increment past our save areas.
1510 if (AFI->getGPRCalleeSavedArea3Size()) {
1512 (void)PushPopSplit;
1513 MBBI++;
1514 }
1515
1516 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedArea1Size()) {
1517 MBBI++;
1518 // Since vpop register list cannot have gaps, there may be multiple vpop
1519 // instructions in the epilogue.
1520 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1521 MBBI++;
1522 }
1523 if (AFI->getDPRCalleeSavedGapSize()) {
1524 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1525 "unexpected DPR alignment gap");
1526 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1528 }
1529
1530 if (AFI->getGPRCalleeSavedArea2Size()) {
1532 (void)PushPopSplit;
1533 MBBI++;
1534 }
1535 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1536
1537 if (ReservedArgStack || IncomingArgStackToRestore) {
1538 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1539 "attempting to restore negative stack amount");
1540 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1541 ReservedArgStack + IncomingArgStackToRestore,
1543 }
1544
1545 // Validate PAC, It should have been already popped into R12. For CMSE entry
1546 // function, the validation instruction is emitted during expansion of the
1547 // tBXNS_RET, since the validation must use the value of SP at function
1548 // entry, before saving, resp. after restoring, FPCXTNS.
1549 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1550 BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1551 }
1552
1553 if (MF.hasWinCFI()) {
1555 BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1557 }
1558}
1559
1560/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1561/// debug info. It's the same as what we use for resolving the code-gen
1562/// references for now. FIXME: This can go wrong when references are
1563/// SP-relative and simple call frames aren't used.
1565 int FI,
1566 Register &FrameReg) const {
1567 return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1568}
1569
1571 int FI, Register &FrameReg,
1572 int SPAdj) const {
1573 const MachineFrameInfo &MFI = MF.getFrameInfo();
1574 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1576 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1577 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1578 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1579 bool isFixed = MFI.isFixedObjectIndex(FI);
1580
1581 FrameReg = ARM::SP;
1582 Offset += SPAdj;
1583
1584 // SP can move around if there are allocas. We may also lose track of SP
1585 // when emergency spilling inside a non-reserved call frame setup.
1586 bool hasMovingSP = !hasReservedCallFrame(MF);
1587
1588 // When dynamically realigning the stack, use the frame pointer for
1589 // parameters, and the stack/base pointer for locals.
1590 if (RegInfo->hasStackRealignment(MF)) {
1591 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1592 if (isFixed) {
1593 FrameReg = RegInfo->getFrameRegister(MF);
1594 Offset = FPOffset;
1595 } else if (hasMovingSP) {
1596 assert(RegInfo->hasBasePointer(MF) &&
1597 "VLAs and dynamic stack alignment, but missing base pointer!");
1598 FrameReg = RegInfo->getBaseRegister();
1599 Offset -= SPAdj;
1600 }
1601 return Offset;
1602 }
1603
1604 // If there is a frame pointer, use it when we can.
1605 if (hasFP(MF) && AFI->hasStackFrame()) {
1606 // Use frame pointer to reference fixed objects. Use it for locals if
1607 // there are VLAs (and thus the SP isn't reliable as a base).
1608 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1609 FrameReg = RegInfo->getFrameRegister(MF);
1610 return FPOffset;
1611 } else if (hasMovingSP) {
1612 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1613 if (AFI->isThumb2Function()) {
1614 // Try to use the frame pointer if we can, else use the base pointer
1615 // since it's available. This is handy for the emergency spill slot, in
1616 // particular.
1617 if (FPOffset >= -255 && FPOffset < 0) {
1618 FrameReg = RegInfo->getFrameRegister(MF);
1619 return FPOffset;
1620 }
1621 }
1622 } else if (AFI->isThumbFunction()) {
1623 // Prefer SP to base pointer, if the offset is suitably aligned and in
1624 // range as the effective range of the immediate offset is bigger when
1625 // basing off SP.
1626 // Use add <rd>, sp, #<imm8>
1627 // ldr <rd>, [sp, #<imm8>]
1628 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1629 return Offset;
1630 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1631 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1632 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1633 FrameReg = RegInfo->getFrameRegister(MF);
1634 return FPOffset;
1635 }
1636 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1637 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1638 FrameReg = RegInfo->getFrameRegister(MF);
1639 return FPOffset;
1640 }
1641 }
1642 // Use the base pointer if we have one.
1643 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1644 // That can happen if we forced a base pointer for a large call frame.
1645 if (RegInfo->hasBasePointer(MF)) {
1646 FrameReg = RegInfo->getBaseRegister();
1647 Offset -= SPAdj;
1648 }
1649 return Offset;
1650}
1651
1652void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1655 unsigned StmOpc, unsigned StrOpc,
1656 bool NoGap,
1657 function_ref<bool(unsigned)> Func) const {
1658 MachineFunction &MF = *MBB.getParent();
1661
1662 DebugLoc DL;
1663
1664 using RegAndKill = std::pair<unsigned, bool>;
1665
1667 unsigned i = CSI.size();
1668 while (i != 0) {
1669 unsigned LastReg = 0;
1670 for (; i != 0; --i) {
1671 Register Reg = CSI[i-1].getReg();
1672 if (!Func(Reg))
1673 continue;
1674
1675 const MachineRegisterInfo &MRI = MF.getRegInfo();
1676 bool isLiveIn = MRI.isLiveIn(Reg);
1677 if (!isLiveIn && !MRI.isReserved(Reg))
1678 MBB.addLiveIn(Reg);
1679 // If NoGap is true, push consecutive registers and then leave the rest
1680 // for other instructions. e.g.
1681 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1682 if (NoGap && LastReg && LastReg != Reg-1)
1683 break;
1684 LastReg = Reg;
1685 // Do not set a kill flag on values that are also marked as live-in. This
1686 // happens with the @llvm-returnaddress intrinsic and with arguments
1687 // passed in callee saved registers.
1688 // Omitting the kill flags is conservatively correct even if the live-in
1689 // is not used after all.
1690 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1691 }
1692
1693 if (Regs.empty())
1694 continue;
1695
1696 llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1697 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1698 });
1699
1700 if (Regs.size() > 1 || StrOpc== 0) {
1701 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1702 .addReg(ARM::SP)
1705 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1706 MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1707 } else if (Regs.size() == 1) {
1708 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1709 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1710 .addReg(ARM::SP)
1712 .addImm(-4)
1714 }
1715 Regs.clear();
1716
1717 // Put any subsequent vpush instructions before this one: they will refer to
1718 // higher register numbers so need to be pushed first in order to preserve
1719 // monotonicity.
1720 if (MI != MBB.begin())
1721 --MI;
1722 }
1723}
1724
1725void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1728 unsigned LdmOpc, unsigned LdrOpc,
1729 bool isVarArg, bool NoGap,
1730 function_ref<bool(unsigned)> Func) const {
1731 MachineFunction &MF = *MBB.getParent();
1735 bool hasPAC = AFI->shouldSignReturnAddress();
1736 DebugLoc DL;
1737 bool isTailCall = false;
1738 bool isInterrupt = false;
1739 bool isTrap = false;
1740 bool isCmseEntry = false;
1743 if (MBB.end() != MI) {
1744 DL = MI->getDebugLoc();
1745 unsigned RetOpcode = MI->getOpcode();
1746 isTailCall =
1747 (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri ||
1748 RetOpcode == ARM::TCRETURNrinotr12);
1749 isInterrupt =
1750 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1751 isTrap =
1752 RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1753 RetOpcode == ARM::tTRAP;
1754 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1755 }
1756
1758 unsigned i = CSI.size();
1759 while (i != 0) {
1760 unsigned LastReg = 0;
1761 bool DeleteRet = false;
1762 for (; i != 0; --i) {
1763 CalleeSavedInfo &Info = CSI[i-1];
1764 Register Reg = Info.getReg();
1765 if (!Func(Reg))
1766 continue;
1767
1768 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1769 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1770 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1771 (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1772 PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
1773 Reg = ARM::PC;
1774 // Fold the return instruction into the LDM.
1775 DeleteRet = true;
1776 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1777 }
1778
1779 // If NoGap is true, pop consecutive registers and then leave the rest
1780 // for other instructions. e.g.
1781 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1782 if (NoGap && LastReg && LastReg != Reg-1)
1783 break;
1784
1785 LastReg = Reg;
1786 Regs.push_back(Reg);
1787 }
1788
1789 if (Regs.empty())
1790 continue;
1791
1792 llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1793 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1794 });
1795
1796 if (Regs.size() > 1 || LdrOpc == 0) {
1797 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1798 .addReg(ARM::SP)
1801 for (unsigned Reg : Regs)
1802 MIB.addReg(Reg, getDefRegState(true));
1803 if (DeleteRet) {
1804 if (MI != MBB.end()) {
1805 MIB.copyImplicitOps(*MI);
1806 MI->eraseFromParent();
1807 }
1808 }
1809 MI = MIB;
1810 } else if (Regs.size() == 1) {
1811 // If we adjusted the reg to PC from LR above, switch it back here. We
1812 // only do that for LDM.
1813 if (Regs[0] == ARM::PC)
1814 Regs[0] = ARM::LR;
1816 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1817 .addReg(ARM::SP, RegState::Define)
1818 .addReg(ARM::SP)
1820 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1821 // that refactoring is complete (eventually).
1822 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1823 MIB.addReg(0);
1825 } else
1826 MIB.addImm(4);
1827 MIB.add(predOps(ARMCC::AL));
1828 }
1829 Regs.clear();
1830
1831 // Put any subsequent vpop instructions after this one: they will refer to
1832 // higher register numbers so need to be popped afterwards.
1833 if (MI != MBB.end())
1834 ++MI;
1835 }
1836}
1837
1838/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1839/// starting from d8. Also insert stack realignment code and leave the stack
1840/// pointer pointing to the d8 spill slot.
1843 unsigned NumAlignedDPRCS2Regs,
1845 const TargetRegisterInfo *TRI) {
1846 MachineFunction &MF = *MBB.getParent();
1848 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1850 MachineFrameInfo &MFI = MF.getFrameInfo();
1851
1852 // Mark the D-register spill slots as properly aligned. Since MFI computes
1853 // stack slot layout backwards, this can actually mean that the d-reg stack
1854 // slot offsets can be wrong. The offset for d8 will always be correct.
1855 for (const CalleeSavedInfo &I : CSI) {
1856 unsigned DNum = I.getReg() - ARM::D8;
1857 if (DNum > NumAlignedDPRCS2Regs - 1)
1858 continue;
1859 int FI = I.getFrameIdx();
1860 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1861 // registers will be 8-byte aligned.
1862 MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1863
1864 // The stack slot for D8 needs to be maximally aligned because this is
1865 // actually the point where we align the stack pointer. MachineFrameInfo
1866 // computes all offsets relative to the incoming stack pointer which is a
1867 // bit weird when realigning the stack. Any extra padding for this
1868 // over-alignment is not realized because the code inserted below adjusts
1869 // the stack pointer by numregs * 8 before aligning the stack pointer.
1870 if (DNum == 0)
1871 MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1872 }
1873
1874 // Move the stack pointer to the d8 spill slot, and align it at the same
1875 // time. Leave the stack slot address in the scratch register r4.
1876 //
1877 // sub r4, sp, #numregs * 8
1878 // bic r4, r4, #align - 1
1879 // mov sp, r4
1880 //
1881 bool isThumb = AFI->isThumbFunction();
1882 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1883 AFI->setShouldRestoreSPFromFP(true);
1884
1885 // sub r4, sp, #numregs * 8
1886 // The immediate is <= 64, so it doesn't need any special encoding.
1887 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1888 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1889 .addReg(ARM::SP)
1890 .addImm(8 * NumAlignedDPRCS2Regs)
1892 .add(condCodeOp());
1893
1894 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1895 // We must set parameter MustBeSingleInstruction to true, since
1896 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1897 // stack alignment. Luckily, this can always be done since all ARM
1898 // architecture versions that support Neon also support the BFC
1899 // instruction.
1900 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1901
1902 // mov sp, r4
1903 // The stack pointer must be adjusted before spilling anything, otherwise
1904 // the stack slots could be clobbered by an interrupt handler.
1905 // Leave r4 live, it is used below.
1906 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1907 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1908 .addReg(ARM::R4)
1910 if (!isThumb)
1911 MIB.add(condCodeOp());
1912
1913 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1914 // r4 holds the stack slot address.
1915 unsigned NextReg = ARM::D8;
1916
1917 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1918 // The writeback is only needed when emitting two vst1.64 instructions.
1919 if (NumAlignedDPRCS2Regs >= 6) {
1920 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1921 &ARM::QQPRRegClass);
1922 MBB.addLiveIn(SupReg);
1923 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1924 .addReg(ARM::R4, RegState::Kill)
1925 .addImm(16)
1926 .addReg(NextReg)
1929 NextReg += 4;
1930 NumAlignedDPRCS2Regs -= 4;
1931 }
1932
1933 // We won't modify r4 beyond this point. It currently points to the next
1934 // register to be spilled.
1935 unsigned R4BaseReg = NextReg;
1936
1937 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1938 if (NumAlignedDPRCS2Regs >= 4) {
1939 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1940 &ARM::QQPRRegClass);
1941 MBB.addLiveIn(SupReg);
1942 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1943 .addReg(ARM::R4)
1944 .addImm(16)
1945 .addReg(NextReg)
1948 NextReg += 4;
1949 NumAlignedDPRCS2Regs -= 4;
1950 }
1951
1952 // 16-byte aligned vst1.64 with 2 d-regs.
1953 if (NumAlignedDPRCS2Regs >= 2) {
1954 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1955 &ARM::QPRRegClass);
1956 MBB.addLiveIn(SupReg);
1957 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1958 .addReg(ARM::R4)
1959 .addImm(16)
1960 .addReg(SupReg)
1962 NextReg += 2;
1963 NumAlignedDPRCS2Regs -= 2;
1964 }
1965
1966 // Finally, use a vanilla vstr.64 for the odd last register.
1967 if (NumAlignedDPRCS2Regs) {
1968 MBB.addLiveIn(NextReg);
1969 // vstr.64 uses addrmode5 which has an offset scale of 4.
1970 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1971 .addReg(NextReg)
1972 .addReg(ARM::R4)
1973 .addImm((NextReg - R4BaseReg) * 2)
1975 }
1976
1977 // The last spill instruction inserted should kill the scratch register r4.
1978 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1979}
1980
1981/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1982/// iterator to the following instruction.
1985 unsigned NumAlignedDPRCS2Regs) {
1986 // sub r4, sp, #numregs * 8
1987 // bic r4, r4, #align - 1
1988 // mov sp, r4
1989 ++MI; ++MI; ++MI;
1990 assert(MI->mayStore() && "Expecting spill instruction");
1991
1992 // These switches all fall through.
1993 switch(NumAlignedDPRCS2Regs) {
1994 case 7:
1995 ++MI;
1996 assert(MI->mayStore() && "Expecting spill instruction");
1997 [[fallthrough]];
1998 default:
1999 ++MI;
2000 assert(MI->mayStore() && "Expecting spill instruction");
2001 [[fallthrough]];
2002 case 1:
2003 case 2:
2004 case 4:
2005 assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");
2006 ++MI;
2007 }
2008 return MI;
2009}
2010
2011/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
2012/// starting from d8. These instructions are assumed to execute while the
2013/// stack is still aligned, unlike the code inserted by emitPopInst.
2016 unsigned NumAlignedDPRCS2Regs,
2018 const TargetRegisterInfo *TRI) {
2019 MachineFunction &MF = *MBB.getParent();
2021 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
2023
2024 // Find the frame index assigned to d8.
2025 int D8SpillFI = 0;
2026 for (const CalleeSavedInfo &I : CSI)
2027 if (I.getReg() == ARM::D8) {
2028 D8SpillFI = I.getFrameIdx();
2029 break;
2030 }
2031
2032 // Materialize the address of the d8 spill slot into the scratch register r4.
2033 // This can be fairly complicated if the stack frame is large, so just use
2034 // the normal frame index elimination mechanism to do it. This code runs as
2035 // the initial part of the epilog where the stack and base pointers haven't
2036 // been changed yet.
2037 bool isThumb = AFI->isThumbFunction();
2038 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
2039
2040 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
2041 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
2042 .addFrameIndex(D8SpillFI)
2043 .addImm(0)
2045 .add(condCodeOp());
2046
2047 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
2048 unsigned NextReg = ARM::D8;
2049
2050 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
2051 if (NumAlignedDPRCS2Regs >= 6) {
2052 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
2053 &ARM::QQPRRegClass);
2054 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
2055 .addReg(ARM::R4, RegState::Define)
2056 .addReg(ARM::R4, RegState::Kill)
2057 .addImm(16)
2060 NextReg += 4;
2061 NumAlignedDPRCS2Regs -= 4;
2062 }
2063
2064 // We won't modify r4 beyond this point. It currently points to the next
2065 // register to be spilled.
2066 unsigned R4BaseReg = NextReg;
2067
2068 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
2069 if (NumAlignedDPRCS2Regs >= 4) {
2070 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
2071 &ARM::QQPRRegClass);
2072 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
2073 .addReg(ARM::R4)
2074 .addImm(16)
2077 NextReg += 4;
2078 NumAlignedDPRCS2Regs -= 4;
2079 }
2080
2081 // 16-byte aligned vld1.64 with 2 d-regs.
2082 if (NumAlignedDPRCS2Regs >= 2) {
2083 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
2084 &ARM::QPRRegClass);
2085 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
2086 .addReg(ARM::R4)
2087 .addImm(16)
2089 NextReg += 2;
2090 NumAlignedDPRCS2Regs -= 2;
2091 }
2092
2093 // Finally, use a vanilla vldr.64 for the remaining odd register.
2094 if (NumAlignedDPRCS2Regs)
2095 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
2096 .addReg(ARM::R4)
2097 .addImm(2 * (NextReg - R4BaseReg))
2099
2100 // Last store kills r4.
2101 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
2102}
2103
2107 if (CSI.empty())
2108 return false;
2109
2110 MachineFunction &MF = *MBB.getParent();
2114 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2115
2116 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
2117 unsigned PushOneOpc = AFI->isThumbFunction() ?
2118 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
2119 unsigned FltOpc = ARM::VSTMDDB_UPD;
2120 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2121 // Compute PAC in R12.
2122 if (AFI->shouldSignReturnAddress()) {
2123 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
2125 }
2126 // Save the non-secure floating point context.
2127 if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
2128 return C.getReg() == ARM::FPCXTNS;
2129 })) {
2130 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
2131 ARM::SP)
2132 .addReg(ARM::SP)
2133 .addImm(-4)
2135 }
2136
2137 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2138 RegInfo](unsigned Reg, SpillArea TestArea) {
2139 return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2140 TestArea;
2141 };
2142 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2143 return CheckRegArea(Reg, SpillArea::GPRCS1);
2144 };
2145 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2146 return CheckRegArea(Reg, SpillArea::GPRCS2);
2147 };
2148 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2149 return CheckRegArea(Reg, SpillArea::DPRCS1);
2150 };
2151 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2152 return CheckRegArea(Reg, SpillArea::GPRCS3);
2153 };
2154
2155 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS1);
2156 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS2);
2157 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, IsDPRCS1);
2158 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS3);
2159
2160 // The code above does not insert spill code for the aligned DPRCS2 registers.
2161 // The stack realignment code will be inserted between the push instructions
2162 // and these spills.
2163 if (NumAlignedDPRCS2Regs)
2164 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2165
2166 return true;
2167}
2168
2172 if (CSI.empty())
2173 return false;
2174
2175 MachineFunction &MF = *MBB.getParent();
2177 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2178
2179 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2180 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2183
2184 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2185 // registers. Do that here instead.
2186 if (NumAlignedDPRCS2Regs)
2187 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2188
2189 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2190 unsigned LdrOpc =
2191 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2192 unsigned FltOpc = ARM::VLDMDIA_UPD;
2193
2194 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2195 RegInfo](unsigned Reg, SpillArea TestArea) {
2196 return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2197 TestArea;
2198 };
2199 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2200 return CheckRegArea(Reg, SpillArea::GPRCS1);
2201 };
2202 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2203 return CheckRegArea(Reg, SpillArea::GPRCS2);
2204 };
2205 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2206 return CheckRegArea(Reg, SpillArea::DPRCS1);
2207 };
2208 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2209 return CheckRegArea(Reg, SpillArea::GPRCS3);
2210 };
2211
2212 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS3);
2213 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, IsDPRCS1);
2214 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS2);
2215 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS1);
2216
2217 return true;
2218}
2219
2220// FIXME: Make generic?
2222 const ARMBaseInstrInfo &TII) {
2223 unsigned FnSize = 0;
2224 for (auto &MBB : MF) {
2225 for (auto &MI : MBB)
2226 FnSize += TII.getInstSizeInBytes(MI);
2227 }
2228 if (MF.getJumpTableInfo())
2229 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2230 FnSize += Table.MBBs.size() * 4;
2231 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2232 return FnSize;
2233}
2234
2235/// estimateRSStackSizeLimit - Look at each instruction that references stack
2236/// frames and return the stack size limit beyond which some of these
2237/// instructions will require a scratch register during their expansion later.
2238// FIXME: Move to TII?
2240 const TargetFrameLowering *TFI,
2241 bool &HasNonSPFrameIndex) {
2242 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2243 const ARMBaseInstrInfo &TII =
2244 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2246 unsigned Limit = (1 << 12) - 1;
2247 for (auto &MBB : MF) {
2248 for (auto &MI : MBB) {
2249 if (MI.isDebugInstr())
2250 continue;
2251 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2252 if (!MI.getOperand(i).isFI())
2253 continue;
2254
2255 // When using ADDri to get the address of a stack object, 255 is the
2256 // largest offset guaranteed to fit in the immediate offset.
2257 if (MI.getOpcode() == ARM::ADDri) {
2258 Limit = std::min(Limit, (1U << 8) - 1);
2259 break;
2260 }
2261 // t2ADDri will not require an extra register, it can reuse the
2262 // destination.
2263 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2264 break;
2265
2266 const MCInstrDesc &MCID = MI.getDesc();
2267 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
2268 if (RegClass && !RegClass->contains(ARM::SP))
2269 HasNonSPFrameIndex = true;
2270
2271 // Otherwise check the addressing mode.
2272 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2274 case ARMII::AddrMode2:
2275 // Default 12 bit limit.
2276 break;
2277 case ARMII::AddrMode3:
2279 Limit = std::min(Limit, (1U << 8) - 1);
2280 break;
2282 Limit = std::min(Limit, ((1U << 8) - 1) * 2);
2283 break;
2284 case ARMII::AddrMode5:
2287 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
2288 break;
2290 // i12 supports only positive offset so these will be converted to
2291 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2292 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2293 Limit = std::min(Limit, (1U << 8) - 1);
2294 break;
2295 case ARMII::AddrMode4:
2296 case ARMII::AddrMode6:
2297 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2298 // immediate offset for stack references.
2299 return 0;
2301 Limit = std::min(Limit, ((1U << 7) - 1) * 1);
2302 break;
2304 Limit = std::min(Limit, ((1U << 7) - 1) * 2);
2305 break;
2307 Limit = std::min(Limit, ((1U << 7) - 1) * 4);
2308 break;
2309 default:
2310 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2311 }
2312 break; // At most one FI per instruction
2313 }
2314 }
2315 }
2316
2317 return Limit;
2318}
2319
2320// In functions that realign the stack, it can be an advantage to spill the
2321// callee-saved vector registers after realigning the stack. The vst1 and vld1
2322// instructions take alignment hints that can improve performance.
2323static void
2325 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2327 return;
2328
2329 // Naked functions don't spill callee-saved registers.
2330 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2331 return;
2332
2333 // We are planning to use NEON instructions vst1 / vld1.
2334 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2335 return;
2336
2337 // Don't bother if the default stack alignment is sufficiently high.
2339 return;
2340
2341 // Aligned spills require stack realignment.
2342 if (!static_cast<const ARMBaseRegisterInfo *>(
2344 return;
2345
2346 // We always spill contiguous d-registers starting from d8. Count how many
2347 // needs spilling. The register allocator will almost always use the
2348 // callee-saved registers in order, but it can happen that there are holes in
2349 // the range. Registers above the hole will be spilled to the standard DPRCS
2350 // area.
2351 unsigned NumSpills = 0;
2352 for (; NumSpills < 8; ++NumSpills)
2353 if (!SavedRegs.test(ARM::D8 + NumSpills))
2354 break;
2355
2356 // Don't do this for just one d-register. It's not worth it.
2357 if (NumSpills < 2)
2358 return;
2359
2360 // Spill the first NumSpills D-registers after realigning the stack.
2361 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2362
2363 // A scratch register is required for the vst1 / vld1 instructions.
2364 SavedRegs.set(ARM::R4);
2365}
2366
2368 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2369 // upon function entry (resp. restore it immmediately before return)
2370 if (STI.hasV8_1MMainlineOps() &&
2372 return false;
2373
2374 // We are disabling shrinkwrapping for now when PAC is enabled, as
2375 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2376 // generated. A follow-up patch will fix this in a more performant manner.
2378 true /* SpillsLR */))
2379 return false;
2380
2381 return true;
2382}
2383
2385 const MachineFunction &MF) const {
2386 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2387 return Subtarget.createAAPCSFrameChain() && hasFP(MF);
2388}
2389
2390// Thumb1 may require a spill when storing to a frame index through FP (or any
2391// access with execute-only), for cases where FP is a high register (R11). This
2392// scans the function for cases where this may happen.
2394 const TargetFrameLowering &TFI) {
2395 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2396 if (!AFI->isThumb1OnlyFunction())
2397 return false;
2398
2399 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2400 for (const auto &MBB : MF)
2401 for (const auto &MI : MBB)
2402 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2403 STI.genExecuteOnly())
2404 for (const auto &Op : MI.operands())
2405 if (Op.isFI()) {
2406 Register Reg;
2407 TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
2408 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2409 return true;
2410 }
2411 return false;
2412}
2413
2415 BitVector &SavedRegs,
2416 RegScavenger *RS) const {
2418 // This tells PEI to spill the FP as if it is any other callee-save register
2419 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2420 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2421 // to combine multiple loads / stores.
2422 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)) &&
2424 bool CS1Spilled = false;
2425 bool LRSpilled = false;
2426 unsigned NumGPRSpills = 0;
2427 unsigned NumFPRSpills = 0;
2428 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2429 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2430 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2432 const ARMBaseInstrInfo &TII =
2433 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2435 MachineFrameInfo &MFI = MF.getFrameInfo();
2438 (void)TRI; // Silence unused warning in non-assert builds.
2439 Register FramePtr = RegInfo->getFrameRegister(MF);
2442
2443 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2444 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2445 // since it's not always possible to restore sp from fp in a single
2446 // instruction.
2447 // FIXME: It will be better just to find spare register here.
2448 if (AFI->isThumb2Function() &&
2449 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2450 SavedRegs.set(ARM::R4);
2451
2452 // If a stack probe will be emitted, spill R4 and LR, since they are
2453 // clobbered by the stack probe call.
2454 // This estimate should be a safe, conservative estimate. The actual
2455 // stack probe is enabled based on the size of the local objects;
2456 // this estimate also includes the varargs store size.
2457 if (STI.isTargetWindows() &&
2458 WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2459 SavedRegs.set(ARM::R4);
2460 SavedRegs.set(ARM::LR);
2461 }
2462
2463 if (AFI->isThumb1OnlyFunction()) {
2464 // Spill LR if Thumb1 function uses variable length argument lists.
2465 if (AFI->getArgRegsSaveSize() > 0)
2466 SavedRegs.set(ARM::LR);
2467
2468 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2469 // requires stack alignment. We don't know for sure what the stack size
2470 // will be, but for this, an estimate is good enough. If there anything
2471 // changes it, it'll be a spill, which implies we've used all the registers
2472 // and so R4 is already used, so not marking it here will be OK.
2473 // FIXME: It will be better just to find spare register here.
2474 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2475 MFI.estimateStackSize(MF) > 508)
2476 SavedRegs.set(ARM::R4);
2477 }
2478
2479 // See if we can spill vector registers to aligned stack.
2480 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2481
2482 // Spill the BasePtr if it's used.
2483 if (RegInfo->hasBasePointer(MF))
2484 SavedRegs.set(RegInfo->getBaseRegister());
2485
2486 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2487 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2488 CanEliminateFrame = false;
2489
2490 // When return address signing is enabled R12 is treated as callee-saved.
2491 if (AFI->shouldSignReturnAddress())
2492 CanEliminateFrame = false;
2493
2494 // Don't spill FP if the frame can be eliminated. This is determined
2495 // by scanning the callee-save registers to see if any is modified.
2496 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2497 for (unsigned i = 0; CSRegs[i]; ++i) {
2498 unsigned Reg = CSRegs[i];
2499 bool Spilled = false;
2500 if (SavedRegs.test(Reg)) {
2501 Spilled = true;
2502 CanEliminateFrame = false;
2503 }
2504
2505 if (!ARM::GPRRegClass.contains(Reg)) {
2506 if (Spilled) {
2507 if (ARM::SPRRegClass.contains(Reg))
2508 NumFPRSpills++;
2509 else if (ARM::DPRRegClass.contains(Reg))
2510 NumFPRSpills += 2;
2511 else if (ARM::QPRRegClass.contains(Reg))
2512 NumFPRSpills += 4;
2513 }
2514 continue;
2515 }
2516
2517 if (Spilled) {
2518 NumGPRSpills++;
2519
2520 if (PushPopSplit != ARMSubtarget::SplitR7) {
2521 if (Reg == ARM::LR)
2522 LRSpilled = true;
2523 CS1Spilled = true;
2524 continue;
2525 }
2526
2527 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2528 switch (Reg) {
2529 case ARM::LR:
2530 LRSpilled = true;
2531 [[fallthrough]];
2532 case ARM::R0: case ARM::R1:
2533 case ARM::R2: case ARM::R3:
2534 case ARM::R4: case ARM::R5:
2535 case ARM::R6: case ARM::R7:
2536 CS1Spilled = true;
2537 break;
2538 default:
2539 break;
2540 }
2541 } else {
2542 if (PushPopSplit != ARMSubtarget::SplitR7) {
2543 UnspilledCS1GPRs.push_back(Reg);
2544 continue;
2545 }
2546
2547 switch (Reg) {
2548 case ARM::R0: case ARM::R1:
2549 case ARM::R2: case ARM::R3:
2550 case ARM::R4: case ARM::R5:
2551 case ARM::R6: case ARM::R7:
2552 case ARM::LR:
2553 UnspilledCS1GPRs.push_back(Reg);
2554 break;
2555 default:
2556 UnspilledCS2GPRs.push_back(Reg);
2557 break;
2558 }
2559 }
2560 }
2561
2562 bool ForceLRSpill = false;
2563 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2564 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2565 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2566 // use of BL to implement far jump.
2567 if (FnSize >= (1 << 11)) {
2568 CanEliminateFrame = false;
2569 ForceLRSpill = true;
2570 }
2571 }
2572
2573 // If any of the stack slot references may be out of range of an immediate
2574 // offset, make sure a register (or a spill slot) is available for the
2575 // register scavenger. Note that if we're indexing off the frame pointer, the
2576 // effective stack size is 4 bytes larger since the FP points to the stack
2577 // slot of the previous FP. Also, if we have variable sized objects in the
2578 // function, stack slot references will often be negative, and some of
2579 // our instructions are positive-offset only, so conservatively consider
2580 // that case to want a spill slot (or register) as well. Similarly, if
2581 // the function adjusts the stack pointer during execution and the
2582 // adjustments aren't already part of our stack size estimate, our offset
2583 // calculations may be off, so be conservative.
2584 // FIXME: We could add logic to be more precise about negative offsets
2585 // and which instructions will need a scratch register for them. Is it
2586 // worth the effort and added fragility?
2587 unsigned EstimatedStackSize =
2588 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2589
2590 // Determine biggest (positive) SP offset in MachineFrameInfo.
2591 int MaxFixedOffset = 0;
2592 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2593 int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2594 MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2595 }
2596
2597 bool HasFP = hasFP(MF);
2598 if (HasFP) {
2599 if (AFI->hasStackFrame())
2600 EstimatedStackSize += 4;
2601 } else {
2602 // If FP is not used, SP will be used to access arguments, so count the
2603 // size of arguments into the estimation.
2604 EstimatedStackSize += MaxFixedOffset;
2605 }
2606 EstimatedStackSize += 16; // For possible paddings.
2607
2608 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2609 bool HasNonSPFrameIndex = false;
2610 if (AFI->isThumb1OnlyFunction()) {
2611 // For Thumb1, don't bother to iterate over the function. The only
2612 // instruction that requires an emergency spill slot is a store to a
2613 // frame index.
2614 //
2615 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2616 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2617 // a 5-bit unsigned immediate.
2618 //
2619 // We could try to check if the function actually contains a tSTRspi
2620 // that might need the spill slot, but it's not really important.
2621 // Functions with VLAs or extremely large call frames are rare, and
2622 // if a function is allocating more than 1KB of stack, an extra 4-byte
2623 // slot probably isn't relevant.
2624 //
2625 // A special case is the scenario where r11 is used as FP, where accesses
2626 // to a frame index will require its value to be moved into a low reg.
2627 // This is handled later on, once we are able to determine if we have any
2628 // fp-relative accesses.
2629 if (RegInfo->hasBasePointer(MF))
2630 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2631 else
2632 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2633 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2634 } else {
2635 EstimatedRSStackSizeLimit =
2636 estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2637 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2638 }
2639 // Final estimate of whether sp or bp-relative accesses might require
2640 // scavenging.
2641 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2642
2643 // If the stack pointer moves and we don't have a base pointer, the
2644 // estimate logic doesn't work. The actual offsets might be larger when
2645 // we're constructing a call frame, or we might need to use negative
2646 // offsets from fp.
2647 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2648 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2649 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2650
2651 // If we have a frame pointer, we assume arguments will be accessed
2652 // relative to the frame pointer. Check whether fp-relative accesses to
2653 // arguments require scavenging.
2654 //
2655 // We could do slightly better on Thumb1; in some cases, an sp-relative
2656 // offset would be legal even though an fp-relative offset is not.
2657 int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
2658 bool HasLargeArgumentList =
2659 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2660
2661 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2662 HasLargeArgumentList || HasNonSPFrameIndex;
2663 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2664 << "; EstimatedStack: " << EstimatedStackSize
2665 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2666 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2667 if (BigFrameOffsets ||
2668 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2669 AFI->setHasStackFrame(true);
2670
2671 if (HasFP) {
2672 SavedRegs.set(FramePtr);
2673 // If the frame pointer is required by the ABI, also spill LR so that we
2674 // emit a complete frame record.
2675 if ((requiresAAPCSFrameRecord(MF) ||
2677 !LRSpilled) {
2678 SavedRegs.set(ARM::LR);
2679 LRSpilled = true;
2680 NumGPRSpills++;
2681 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2682 if (LRPos != UnspilledCS1GPRs.end())
2683 UnspilledCS1GPRs.erase(LRPos);
2684 }
2685 auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2686 if (FPPos != UnspilledCS1GPRs.end())
2687 UnspilledCS1GPRs.erase(FPPos);
2688 NumGPRSpills++;
2689 if (FramePtr == ARM::R7)
2690 CS1Spilled = true;
2691 }
2692
2693 // This is the number of extra spills inserted for callee-save GPRs which
2694 // would not otherwise be used by the function. When greater than zero it
2695 // guaranteees that it is possible to scavenge a register to hold the
2696 // address of a stack slot. On Thumb1, the register must be a valid operand
2697 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2698 // or lr.
2699 //
2700 // If we don't insert a spill, we instead allocate an emergency spill
2701 // slot, which can be used by scavenging to spill an arbitrary register.
2702 //
2703 // We currently don't try to figure out whether any specific instruction
2704 // requires scavening an additional register.
2705 unsigned NumExtraCSSpill = 0;
2706
2707 if (AFI->isThumb1OnlyFunction()) {
2708 // For Thumb1-only targets, we need some low registers when we save and
2709 // restore the high registers (which aren't allocatable, but could be
2710 // used by inline assembly) because the push/pop instructions can not
2711 // access high registers. If necessary, we might need to push more low
2712 // registers to ensure that there is at least one free that can be used
2713 // for the saving & restoring, and preferably we should ensure that as
2714 // many as are needed are available so that fewer push/pop instructions
2715 // are required.
2716
2717 // Low registers which are not currently pushed, but could be (r4-r7).
2718 SmallVector<unsigned, 4> AvailableRegs;
2719
2720 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2721 // free.
2722 int EntryRegDeficit = 0;
2723 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2724 if (!MF.getRegInfo().isLiveIn(Reg)) {
2725 --EntryRegDeficit;
2727 << printReg(Reg, TRI)
2728 << " is unused argument register, EntryRegDeficit = "
2729 << EntryRegDeficit << "\n");
2730 }
2731 }
2732
2733 // Unused return registers can be clobbered in the epilogue for free.
2734 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2736 << " return regs used, ExitRegDeficit = "
2737 << ExitRegDeficit << "\n");
2738
2739 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2740 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2741
2742 // r4-r6 can be used in the prologue if they are pushed by the first push
2743 // instruction.
2744 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2745 if (SavedRegs.test(Reg)) {
2746 --RegDeficit;
2747 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2748 << " is saved low register, RegDeficit = "
2749 << RegDeficit << "\n");
2750 } else {
2751 AvailableRegs.push_back(Reg);
2752 LLVM_DEBUG(
2753 dbgs()
2754 << printReg(Reg, TRI)
2755 << " is non-saved low register, adding to AvailableRegs\n");
2756 }
2757 }
2758
2759 // r7 can be used if it is not being used as the frame pointer.
2760 if (!HasFP || FramePtr != ARM::R7) {
2761 if (SavedRegs.test(ARM::R7)) {
2762 --RegDeficit;
2763 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2764 << RegDeficit << "\n");
2765 } else {
2766 AvailableRegs.push_back(ARM::R7);
2767 LLVM_DEBUG(
2768 dbgs()
2769 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2770 }
2771 }
2772
2773 // Each of r8-r11 needs to be copied to a low register, then pushed.
2774 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2775 if (SavedRegs.test(Reg)) {
2776 ++RegDeficit;
2777 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2778 << " is saved high register, RegDeficit = "
2779 << RegDeficit << "\n");
2780 }
2781 }
2782
2783 // LR can only be used by PUSH, not POP, and can't be used at all if the
2784 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2785 // are more limited at function entry than exit.
2786 if ((EntryRegDeficit > ExitRegDeficit) &&
2787 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2789 if (SavedRegs.test(ARM::LR)) {
2790 --RegDeficit;
2791 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2792 << RegDeficit << "\n");
2793 } else {
2794 AvailableRegs.push_back(ARM::LR);
2795 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2796 }
2797 }
2798
2799 // If there are more high registers that need pushing than low registers
2800 // available, push some more low registers so that we can use fewer push
2801 // instructions. This might not reduce RegDeficit all the way to zero,
2802 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2803 // need saving.
2804 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2805 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2806 unsigned Reg = AvailableRegs.pop_back_val();
2807 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2808 << " to make up reg deficit\n");
2809 SavedRegs.set(Reg);
2810 NumGPRSpills++;
2811 CS1Spilled = true;
2812 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2813 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2814 NumExtraCSSpill++;
2815 UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2816 if (Reg == ARM::LR)
2817 LRSpilled = true;
2818 }
2819 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2820 << "\n");
2821 }
2822
2823 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2824 // restore LR in that case.
2825 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2826
2827 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2828 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2829 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2830 SavedRegs.set(ARM::LR);
2831 NumGPRSpills++;
2833 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2834 if (LRPos != UnspilledCS1GPRs.end())
2835 UnspilledCS1GPRs.erase(LRPos);
2836
2837 ForceLRSpill = false;
2838 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2839 !AFI->isThumb1OnlyFunction())
2840 NumExtraCSSpill++;
2841 }
2842
2843 // If stack and double are 8-byte aligned and we are spilling an odd number
2844 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2845 // the integer and double callee save areas.
2846 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2847 const Align TargetAlign = getStackAlign();
2848 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2849 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2850 for (unsigned Reg : UnspilledCS1GPRs) {
2851 // Don't spill high register if the function is thumb. In the case of
2852 // Windows on ARM, accept R11 (frame pointer)
2853 if (!AFI->isThumbFunction() ||
2854 (STI.isTargetWindows() && Reg == ARM::R11) ||
2855 isARMLowRegister(Reg) ||
2856 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2857 SavedRegs.set(Reg);
2858 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2859 << " to make up alignment\n");
2860 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2861 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2862 NumExtraCSSpill++;
2863 break;
2864 }
2865 }
2866 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2867 unsigned Reg = UnspilledCS2GPRs.front();
2868 SavedRegs.set(Reg);
2869 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2870 << " to make up alignment\n");
2871 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2872 NumExtraCSSpill++;
2873 }
2874 }
2875
2876 // Estimate if we might need to scavenge registers at some point in order
2877 // to materialize a stack offset. If so, either spill one additional
2878 // callee-saved register or reserve a special spill slot to facilitate
2879 // register scavenging. Thumb1 needs a spill slot for stack pointer
2880 // adjustments and for frame index accesses when FP is high register,
2881 // even when the frame itself is small.
2882 unsigned RegsNeeded = 0;
2883 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this)) {
2884 RegsNeeded++;
2885 // With thumb1 execute-only we may need an additional register for saving
2886 // and restoring the CPSR.
2887 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
2888 RegsNeeded++;
2889 }
2890
2891 if (RegsNeeded > NumExtraCSSpill) {
2892 // If any non-reserved CS register isn't spilled, just spill one or two
2893 // extra. That should take care of it!
2894 unsigned NumExtras = TargetAlign.value() / 4;
2896 while (NumExtras && !UnspilledCS1GPRs.empty()) {
2897 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
2898 if (!MRI.isReserved(Reg) &&
2899 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2900 Extras.push_back(Reg);
2901 NumExtras--;
2902 }
2903 }
2904 // For non-Thumb1 functions, also check for hi-reg CS registers
2905 if (!AFI->isThumb1OnlyFunction()) {
2906 while (NumExtras && !UnspilledCS2GPRs.empty()) {
2907 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
2908 if (!MRI.isReserved(Reg)) {
2909 Extras.push_back(Reg);
2910 NumExtras--;
2911 }
2912 }
2913 }
2914 if (NumExtras == 0) {
2915 for (unsigned Reg : Extras) {
2916 SavedRegs.set(Reg);
2917 if (!MRI.isPhysRegUsed(Reg))
2918 NumExtraCSSpill++;
2919 }
2920 }
2921 while ((RegsNeeded > NumExtraCSSpill) && RS) {
2922 // Reserve a slot closest to SP or frame pointer.
2923 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2924 const TargetRegisterClass &RC = ARM::GPRRegClass;
2925 unsigned Size = TRI->getSpillSize(RC);
2926 Align Alignment = TRI->getSpillAlign(RC);
2928 MFI.CreateStackObject(Size, Alignment, false));
2929 --RegsNeeded;
2930 }
2931 }
2932 }
2933
2934 if (ForceLRSpill)
2935 SavedRegs.set(ARM::LR);
2936 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2937}
2938
2940 MachineFrameInfo &MFI = MF.getFrameInfo();
2941 if (!MFI.isCalleeSavedInfoValid())
2942 return;
2943
2944 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
2945 // into PC so it is not live out of the return block: Clear the Restored bit
2946 // in that case.
2947 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
2948 if (Info.getReg() != ARM::LR)
2949 continue;
2950 if (all_of(MF, [](const MachineBasicBlock &MBB) {
2951 return all_of(MBB.terminators(), [](const MachineInstr &Term) {
2952 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
2953 Term.getOpcode() == ARM::t2LDMIA_RET ||
2954 Term.getOpcode() == ARM::tPOP_RET;
2955 });
2956 })) {
2957 Info.setRestored(false);
2958 break;
2959 }
2960 }
2961}
2962
2964 MachineFunction &MF, RegScavenger *RS) const {
2966 updateLRRestored(MF);
2967}
2968
2970 BitVector &SavedRegs) const {
2972
2973 // If we have the "returned" parameter attribute which guarantees that we
2974 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2975 // record that fact for IPRA.
2976 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2977 if (AFI->getPreservesR0())
2978 SavedRegs.set(ARM::R0);
2979}
2980
2983 std::vector<CalleeSavedInfo> &CSI) const {
2984 // For CMSE entry functions, handle floating-point context as if it was a
2985 // callee-saved register.
2986 if (STI.hasV8_1MMainlineOps() &&
2988 CSI.emplace_back(ARM::FPCXTNS);
2989 CSI.back().setRestored(false);
2990 }
2991
2992 // For functions, which sign their return address, upon function entry, the
2993 // return address PAC is computed in R12. Treat R12 as a callee-saved register
2994 // in this case.
2995 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
2996 if (AFI.shouldSignReturnAddress()) {
2997 // The order of register must match the order we push them, because the
2998 // PEI assigns frame indices in that order. That order depends on the
2999 // PushPopSplitVariation, there are only two cases which we use with return
3000 // address signing:
3001 switch (STI.getPushPopSplitVariation(MF)) {
3003 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
3004 CSI.insert(find_if(CSI,
3005 [=](const auto &CS) {
3006 Register Reg = CS.getReg();
3007 return Reg == ARM::R10 || Reg == ARM::R11 ||
3008 Reg == ARM::R8 || Reg == ARM::R9 ||
3009 ARM::DPRRegClass.contains(Reg);
3010 }),
3011 CalleeSavedInfo(ARM::R12));
3012 break;
3014 // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
3015 // on the stack.
3016 CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
3017 break;
3020 "ABI-required frame pointers need a CSR split when signing return "
3021 "address.");
3022 CSI.insert(find_if(CSI,
3023 [=](const auto &CS) {
3024 Register Reg = CS.getReg();
3025 return Reg != ARM::LR;
3026 }),
3027 CalleeSavedInfo(ARM::R12));
3028 break;
3029 default:
3030 llvm_unreachable("Unexpected CSR split with return address signing");
3031 }
3032 }
3033
3034 return false;
3035}
3036
3039 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
3040 NumEntries = std::size(FixedSpillOffsets);
3041 return FixedSpillOffsets;
3042}
3043
3044MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
3047 const ARMBaseInstrInfo &TII =
3048 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3050 bool isARM = !AFI->isThumbFunction();
3051 DebugLoc dl = I->getDebugLoc();
3052 unsigned Opc = I->getOpcode();
3053 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
3054 unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
3055
3056 assert(!AFI->isThumb1OnlyFunction() &&
3057 "This eliminateCallFramePseudoInstr does not support Thumb1!");
3058
3059 int PIdx = I->findFirstPredOperandIdx();
3060 ARMCC::CondCodes Pred = (PIdx == -1)
3061 ? ARMCC::AL
3062 : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
3063 unsigned PredReg = TII.getFramePred(*I);
3064
3065 if (!hasReservedCallFrame(MF)) {
3066 // Bail early if the callee is expected to do the adjustment.
3067 if (IsDestroy && CalleePopAmount != -1U)
3068 return MBB.erase(I);
3069
3070 // If we have alloca, convert as follows:
3071 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
3072 // ADJCALLSTACKUP -> add, sp, sp, amount
3073 unsigned Amount = TII.getFrameSize(*I);
3074 if (Amount != 0) {
3075 // We need to keep the stack aligned properly. To do this, we round the
3076 // amount of space needed for the outgoing arguments up to the next
3077 // alignment boundary.
3078 Amount = alignSPAdjust(Amount);
3079
3080 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
3081 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
3082 Pred, PredReg);
3083 } else {
3084 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
3085 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
3086 Pred, PredReg);
3087 }
3088 }
3089 } else if (CalleePopAmount != -1U) {
3090 // If the calling convention demands that the callee pops arguments from the
3091 // stack, we want to add it back if we have a reserved call frame.
3092 emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
3093 MachineInstr::NoFlags, Pred, PredReg);
3094 }
3095 return MBB.erase(I);
3096}
3097
3098/// Get the minimum constant for ARM that is greater than or equal to the
3099/// argument. In ARM, constants can have any value that can be produced by
3100/// rotating an 8-bit value to the right by an even number of bits within a
3101/// 32-bit word.
3103 unsigned Shifted = 0;
3104
3105 if (Value == 0)
3106 return 0;
3107
3108 while (!(Value & 0xC0000000)) {
3109 Value = Value << 2;
3110 Shifted += 2;
3111 }
3112
3113 bool Carry = (Value & 0x00FFFFFF);
3114 Value = ((Value & 0xFF000000) >> 24) + Carry;
3115
3116 if (Value & 0x0000100)
3117 Value = Value & 0x000001FC;
3118
3119 if (Shifted > 24)
3120 Value = Value >> (Shifted - 24);
3121 else
3122 Value = Value << (24 - Shifted);
3123
3124 return Value;
3125}
3126
3127// The stack limit in the TCB is set to this many bytes above the actual
3128// stack limit.
3130
3131// Adjust the function prologue to enable split stacks. This currently only
3132// supports android and linux.
3133//
3134// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
3135// must be well defined in order to allow for consistent implementations of the
3136// __morestack helper function. The ABI is also not a normal ABI in that it
3137// doesn't follow the normal calling conventions because this allows the
3138// prologue of each function to be optimized further.
3139//
3140// Currently, the ABI looks like (when calling __morestack)
3141//
3142// * r4 holds the minimum stack size requested for this function call
3143// * r5 holds the stack size of the arguments to the function
3144// * the beginning of the function is 3 instructions after the call to
3145// __morestack
3146//
3147// Implementations of __morestack should use r4 to allocate a new stack, r5 to
3148// place the arguments on to the new stack, and the 3-instruction knowledge to
3149// jump directly to the body of the function when working on the new stack.
3150//
3151// An old (and possibly no longer compatible) implementation of __morestack for
3152// ARM can be found at [1].
3153//
3154// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
3156 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3157 unsigned Opcode;
3158 unsigned CFIIndex;
3159 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
3160 bool Thumb = ST->isThumb();
3161 bool Thumb2 = ST->isThumb2();
3162
3163 // Sadly, this currently doesn't support varargs, platforms other than
3164 // android/linux. Note that thumb1/thumb2 are support for android/linux.
3165 if (MF.getFunction().isVarArg())
3166 report_fatal_error("Segmented stacks do not support vararg functions.");
3167 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
3168 report_fatal_error("Segmented stacks not supported on this platform.");
3169
3170 MachineFrameInfo &MFI = MF.getFrameInfo();
3171 MCContext &Context = MF.getContext();
3172 const MCRegisterInfo *MRI = Context.getRegisterInfo();
3173 const ARMBaseInstrInfo &TII =
3174 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3176 DebugLoc DL;
3177
3178 if (!MFI.needsSplitStackProlog())
3179 return;
3180
3181 uint64_t StackSize = MFI.getStackSize();
3182
3183 // Use R4 and R5 as scratch registers.
3184 // We save R4 and R5 before use and restore them before leaving the function.
3185 unsigned ScratchReg0 = ARM::R4;
3186 unsigned ScratchReg1 = ARM::R5;
3187 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3188 uint64_t AlignedStackSize;
3189
3190 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3191 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3195
3196 // Grab everything that reaches PrologueMBB to update there liveness as well.
3197 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3199 WalkList.push_back(&PrologueMBB);
3200
3201 do {
3202 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3203 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3204 if (BeforePrologueRegion.insert(PredBB).second)
3205 WalkList.push_back(PredBB);
3206 }
3207 } while (!WalkList.empty());
3208
3209 // The order in that list is important.
3210 // The blocks will all be inserted before PrologueMBB using that order.
3211 // Therefore the block that should appear first in the CFG should appear
3212 // first in the list.
3213 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3214 PostStackMBB};
3215
3216 for (MachineBasicBlock *B : AddedBlocks)
3217 BeforePrologueRegion.insert(B);
3218
3219 for (const auto &LI : PrologueMBB.liveins()) {
3220 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3221 PredBB->addLiveIn(LI);
3222 }
3223
3224 // Remove the newly added blocks from the list, since we know
3225 // we do not have to do the following updates for them.
3226 for (MachineBasicBlock *B : AddedBlocks) {
3227 BeforePrologueRegion.erase(B);
3228 MF.insert(PrologueMBB.getIterator(), B);
3229 }
3230
3231 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3232 // Make sure the LiveIns are still sorted and unique.
3234 // Replace the edges to PrologueMBB by edges to the sequences
3235 // we are about to add, but only update for immediate predecessors.
3236 if (MBB->isSuccessor(&PrologueMBB))
3237 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
3238 }
3239
3240 // The required stack size that is aligned to ARM constant criterion.
3241 AlignedStackSize = alignToARMConstant(StackSize);
3242
3243 // When the frame size is less than 256 we just compare the stack
3244 // boundary directly to the value of the stack pointer, per gcc.
3245 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3246
3247 // We will use two of the callee save registers as scratch registers so we
3248 // need to save those registers onto the stack.
3249 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3250 // requested and arguments for __morestack().
3251 // SR0: Scratch Register #0
3252 // SR1: Scratch Register #1
3253 // push {SR0, SR1}
3254 if (Thumb) {
3255 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3257 .addReg(ScratchReg0)
3258 .addReg(ScratchReg1);
3259 } else {
3260 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3261 .addReg(ARM::SP, RegState::Define)
3262 .addReg(ARM::SP)
3264 .addReg(ScratchReg0)
3265 .addReg(ScratchReg1);
3266 }
3267
3268 // Emit the relevant DWARF information about the change in stack pointer as
3269 // well as where to find both r4 and r5 (the callee-save registers)
3270 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3271 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
3272 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3273 .addCFIIndex(CFIIndex);
3275 nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
3276 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3277 .addCFIIndex(CFIIndex);
3279 nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
3280 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3281 .addCFIIndex(CFIIndex);
3282 }
3283
3284 // mov SR1, sp
3285 if (Thumb) {
3286 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3287 .addReg(ARM::SP)
3289 } else if (CompareStackPointer) {
3290 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3291 .addReg(ARM::SP)
3293 .add(condCodeOp());
3294 }
3295
3296 // sub SR1, sp, #StackSize
3297 if (!CompareStackPointer && Thumb) {
3298 if (AlignedStackSize < 256) {
3299 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3300 .add(condCodeOp())
3301 .addReg(ScratchReg1)
3302 .addImm(AlignedStackSize)
3304 } else {
3305 if (Thumb2 || ST->genExecuteOnly()) {
3306 BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)
3307 .addImm(AlignedStackSize);
3308 } else {
3309 auto MBBI = McrMBB->end();
3310 auto RegInfo = STI.getRegisterInfo();
3311 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3312 AlignedStackSize);
3313 }
3314 BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3315 .add(condCodeOp())
3316 .addReg(ScratchReg1)
3317 .addReg(ScratchReg0)
3319 }
3320 } else if (!CompareStackPointer) {
3321 if (AlignedStackSize < 256) {
3322 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3323 .addReg(ARM::SP)
3324 .addImm(AlignedStackSize)
3326 .add(condCodeOp());
3327 } else {
3328 auto MBBI = McrMBB->end();
3329 auto RegInfo = STI.getRegisterInfo();
3330 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3331 AlignedStackSize);
3332 BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3333 .addReg(ARM::SP)
3334 .addReg(ScratchReg0)
3336 .add(condCodeOp());
3337 }
3338 }
3339
3340 if (Thumb && ST->isThumb1Only()) {
3341 if (ST->genExecuteOnly()) {
3342 BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)
3343 .addExternalSymbol("__STACK_LIMIT");
3344 } else {
3345 unsigned PCLabelId = ARMFI->createPICLabelUId();
3347 MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
3349 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
3350
3351 // ldr SR0, [pc, offset(STACK_LIMIT)]
3352 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3355 }
3356
3357 // ldr SR0, [SR0]
3358 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3359 .addReg(ScratchReg0)
3360 .addImm(0)
3362 } else {
3363 // Get TLS base address from the coprocessor
3364 // mrc p15, #0, SR0, c13, c0, #3
3365 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3366 ScratchReg0)
3367 .addImm(15)
3368 .addImm(0)
3369 .addImm(13)
3370 .addImm(0)
3371 .addImm(3)
3373
3374 // Use the last tls slot on android and a private field of the TCP on linux.
3375 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3376 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3377
3378 // Get the stack limit from the right offset
3379 // ldr SR0, [sr0, #4 * TlsOffset]
3380 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3381 ScratchReg0)
3382 .addReg(ScratchReg0)
3383 .addImm(4 * TlsOffset)
3385 }
3386
3387 // Compare stack limit with stack size requested.
3388 // cmp SR0, SR1
3389 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3390 BuildMI(GetMBB, DL, TII.get(Opcode))
3391 .addReg(ScratchReg0)
3392 .addReg(ScratchReg1)
3394
3395 // This jump is taken if StackLimit <= SP - stack required.
3396 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3397 BuildMI(GetMBB, DL, TII.get(Opcode))
3398 .addMBB(PostStackMBB)
3400 .addReg(ARM::CPSR);
3401
3402 // Calling __morestack(StackSize, Size of stack arguments).
3403 // __morestack knows that the stack size requested is in SR0(r4)
3404 // and amount size of stack arguments is in SR1(r5).
3405
3406 // Pass first argument for the __morestack by Scratch Register #0.
3407 // The amount size of stack required
3408 if (Thumb) {
3409 if (AlignedStackSize < 256) {
3410 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3411 .add(condCodeOp())
3412 .addImm(AlignedStackSize)
3414 } else {
3415 if (Thumb2 || ST->genExecuteOnly()) {
3416 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)
3417 .addImm(AlignedStackSize);
3418 } else {
3419 auto MBBI = AllocMBB->end();
3420 auto RegInfo = STI.getRegisterInfo();
3421 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3422 AlignedStackSize);
3423 }
3424 }
3425 } else {
3426 if (AlignedStackSize < 256) {
3427 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3428 .addImm(AlignedStackSize)
3430 .add(condCodeOp());
3431 } else {
3432 auto MBBI = AllocMBB->end();
3433 auto RegInfo = STI.getRegisterInfo();
3434 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3435 AlignedStackSize);
3436 }
3437 }
3438
3439 // Pass second argument for the __morestack by Scratch Register #1.
3440 // The amount size of stack consumed to save function arguments.
3441 if (Thumb) {
3442 if (ARMFI->getArgumentStackSize() < 256) {
3443 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3444 .add(condCodeOp())
3447 } else {
3448 if (Thumb2 || ST->genExecuteOnly()) {
3449 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)
3451 } else {
3452 auto MBBI = AllocMBB->end();
3453 auto RegInfo = STI.getRegisterInfo();
3454 RegInfo->emitLoadConstPool(
3455 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3457 }
3458 }
3459 } else {
3460 if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3461 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3464 .add(condCodeOp());
3465 } else {
3466 auto MBBI = AllocMBB->end();
3467 auto RegInfo = STI.getRegisterInfo();
3468 RegInfo->emitLoadConstPool(
3469 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3471 }
3472 }
3473
3474 // push {lr} - Save return address of this function.
3475 if (Thumb) {
3476 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3478 .addReg(ARM::LR);
3479 } else {
3480 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3481 .addReg(ARM::SP, RegState::Define)
3482 .addReg(ARM::SP)
3484 .addReg(ARM::LR);
3485 }
3486
3487 // Emit the DWARF info about the change in stack as well as where to find the
3488 // previous link register
3489 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3490 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
3491 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3492 .addCFIIndex(CFIIndex);
3494 nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
3495 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3496 .addCFIIndex(CFIIndex);
3497 }
3498
3499 // Call __morestack().
3500 if (Thumb) {
3501 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3503 .addExternalSymbol("__morestack");
3504 } else {
3505 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3506 .addExternalSymbol("__morestack");
3507 }
3508
3509 // pop {lr} - Restore return address of this original function.
3510 if (Thumb) {
3511 if (ST->isThumb1Only()) {
3512 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3514 .addReg(ScratchReg0);
3515 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3516 .addReg(ScratchReg0)
3518 } else {
3519 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3520 .addReg(ARM::LR, RegState::Define)
3521 .addReg(ARM::SP, RegState::Define)
3522 .addReg(ARM::SP)
3523 .addImm(4)
3525 }
3526 } else {
3527 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3528 .addReg(ARM::SP, RegState::Define)
3529 .addReg(ARM::SP)
3531 .addReg(ARM::LR);
3532 }
3533
3534 // Restore SR0 and SR1 in case of __morestack() was called.
3535 // __morestack() will skip PostStackMBB block so we need to restore
3536 // scratch registers from here.
3537 // pop {SR0, SR1}
3538 if (Thumb) {
3539 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3541 .addReg(ScratchReg0)
3542 .addReg(ScratchReg1);
3543 } else {
3544 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3545 .addReg(ARM::SP, RegState::Define)
3546 .addReg(ARM::SP)
3548 .addReg(ScratchReg0)
3549 .addReg(ScratchReg1);
3550 }
3551
3552 // Update the CFA offset now that we've popped
3553 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3554 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3555 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3556 .addCFIIndex(CFIIndex);
3557 }
3558
3559 // Return from this function.
3560 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3561
3562 // Restore SR0 and SR1 in case of __morestack() was not called.
3563 // pop {SR0, SR1}
3564 if (Thumb) {
3565 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3567 .addReg(ScratchReg0)
3568 .addReg(ScratchReg1);
3569 } else {
3570 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3571 .addReg(ARM::SP, RegState::Define)
3572 .addReg(ARM::SP)
3574 .addReg(ScratchReg0)
3575 .addReg(ScratchReg1);
3576 }
3577
3578 // Update the CFA offset now that we've popped
3579 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3580 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3581 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3582 .addCFIIndex(CFIIndex);
3583
3584 // Tell debuggers that r4 and r5 are now the same as they were in the
3585 // previous function, that they're the "Same Value".
3587 nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
3588 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3589 .addCFIIndex(CFIIndex);
3591 nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
3592 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3593 .addCFIIndex(CFIIndex);
3594 }
3595
3596 // Organizing MBB lists
3597 PostStackMBB->addSuccessor(&PrologueMBB);
3598
3599 AllocMBB->addSuccessor(PostStackMBB);
3600
3601 GetMBB->addSuccessor(PostStackMBB);
3602 GetMBB->addSuccessor(AllocMBB);
3603
3604 McrMBB->addSuccessor(GetMBB);
3605
3606 PrevStackMBB->addSuccessor(McrMBB);
3607
3608#ifdef EXPENSIVE_CHECKS
3609 MF.verify();
3610#endif
3611}
unsigned const MachineRegisterInfo * MRI
static int64_t getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)
Returns how much of the incoming argument stack area (in bytes) we should clean up in an epilogue.
static bool needsWinCFI(const MachineFunction &MF)
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
unsigned RegSize
static bool isThumb(const MCSubtargetInfo &STI)
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
static void insertSEHRange(MachineBasicBlock &MBB, MachineBasicBlock::iterator Start, const MachineBasicBlock::iterator &End, const ARMBaseInstrInfo &TII, unsigned MIFlags)
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, unsigned Flags)
SpillArea getSpillArea(Register Reg, ARMSubtarget::PushPopSplitVariation Variation, unsigned NumAlignedDPRCS2Regs, const ARMBaseRegisterInfo *RegInfo)
Get the spill area that Reg should be saved into in the prologue.
static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, const TargetFrameLowering &TFI)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, const MachineFunction &MF)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
static MachineBasicBlock::iterator initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI)
static int sizeOfSPAdjustment(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
Value * RHS
Value * LHS
static const unsigned FramePtr
bool hasBasePointer(const MachineFunction &MF) const
virtual void emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred=ARMCC::AL, Register PredReg=Register(), unsigned MIFlags=MachineInstr::NoFlags) const
emitLoadConstPool - Emits a load from constpool to materialize the specified immediate.
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool cannotEliminateFrame(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
bool canRealignStack(const MachineFunction &MF) const override
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
bool keepFramePointer(const MachineFunction &MF) const
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
ARMFrameLowering(const ARMSubtarget &sti)
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool requiresAAPCSFrameRecord(const MachineFunction &MF) const
bool isFPReserved(const MachineFunction &MF) const
isFPReserved - Return true if the frame pointer register should be considered a reserved register on ...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override
getCalleeSavedSpillSlots - This method returns a pointer to an array of pairs, that contains an entry...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, int SPAdj) const
void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const override
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
const ARMSubtarget & STI
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
unsigned getFPCXTSaveAreaSize() const
unsigned getGPRCalleeSavedArea1Size() const
unsigned getDPRCalleeSavedGapSize() const
unsigned getDPRCalleeSavedArea1Size() const
void setDPRCalleeSavedArea1Offset(unsigned o)
void setGPRCalleeSavedArea2Size(unsigned s)
void setFramePtrSpillOffset(unsigned o)
unsigned getGPRCalleeSavedArea2Size() const
unsigned getNumAlignedDPRCS2Regs() const
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getArgumentStackToRestore() const
void setFPCXTSaveAreaSize(unsigned s)
unsigned getGPRCalleeSavedArea3Size() const
unsigned getFramePtrSpillOffset() const
unsigned getArgRegsSaveSize() const
void setGPRCalleeSavedArea2Offset(unsigned o)
void setGPRCalleeSavedArea1Offset(unsigned o)
void setDPRCalleeSavedArea1Size(unsigned s)
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getArgumentStackSize() const
unsigned getReturnRegsCount() const
void setGPRCalleeSavedArea3Size(unsigned s)
bool useMovt() const
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:238
bool isTargetWindows() const
Definition: ARMSubtarget.h:345
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:250
enum PushPopSplitVariation getPushPopSplitVariation(const MachineFunction &MF) const
PushPopSplitVariation
How the push and pop instructions of callee saved general-purpose registers should be split.
Definition: ARMSubtarget.h:86
@ SplitR11WindowsSEH
When the stack frame size is not known (because of variable-sized objects or realignment),...
Definition: ARMSubtarget.h:111
@ SplitR7
R7 and LR must be adjacent, because R7 is the frame pointer, and must point to a frame record consist...
Definition: ARMSubtarget.h:102
@ SplitR11AAPCSSignRA
When generating AAPCS-compilant frame chains, R11 is the frame pointer, and must be pushed adjacent t...
Definition: ARMSubtarget.h:123
@ NoSplit
All GPRs can be pushed in a single instruction.
Definition: ARMSubtarget.h:90
bool isTargetELF() const
Definition: ARMSubtarget.h:348
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:246
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:661
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:582
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:575
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:617
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:590
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:670
Context object for machine code objects.
Definition: MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
iterator_range< livein_iterator > liveins() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to 'Old', change the code and CFG so that it branches to 'N...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int getStackProtectorIndex() const
Return the index for the stack protector object.
int64_t getOffsetAdjustment() const
Return the correction for frame offsets.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
typename SuperClass::iterator iterator
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
virtual void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment.
virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const
getFrameIndexReference - This method should return the base register and offset used to reference a f...
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool FramePointerIsReserved(const MachineFunction &MF) const
FramePointerIsReserved - This returns true if the frame pointer must always either point to a new fra...
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
Definition: Value.h:74
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
@ D16
Only 16 D registers.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85