LLVM 19.0.0git
ARMFrameLowering.cpp
Go to the documentation of this file.
1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
115#include "ARMSubtarget.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCContext.h"
144#include "llvm/MC/MCDwarf.h"
145#include "llvm/MC/MCInstrDesc.h"
147#include "llvm/Support/CodeGen.h"
150#include "llvm/Support/Debug.h"
156#include <algorithm>
157#include <cassert>
158#include <cstddef>
159#include <cstdint>
160#include <iterator>
161#include <utility>
162#include <vector>
163
164#define DEBUG_TYPE "arm-frame-lowering"
165
166using namespace llvm;
167
168static cl::opt<bool>
169SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
170 cl::desc("Align ARM NEON spills in prolog and epilog"));
171
174 unsigned NumAlignedDPRCS2Regs);
175
177 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
178 STI(sti) {}
179
181 // iOS always has a FP for backtracking, force other targets to keep their FP
182 // when doing FastISel. The emitted code is currently superior, and in cases
183 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
184 return MF.getSubtarget<ARMSubtarget>().useFastISel();
185}
186
187/// Returns true if the target can safely skip saving callee-saved registers
188/// for noreturn nounwind functions.
190 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
191 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
192 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
193
194 // Frame pointer and link register are not treated as normal CSR, thus we
195 // can always skip CSR saves for nonreturning functions.
196 return true;
197}
198
199/// hasFP - Return true if the specified function should have a dedicated frame
200/// pointer register. This is true if the function has variable sized allocas
201/// or if frame pointer elimination is disabled.
203 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
204 const MachineFrameInfo &MFI = MF.getFrameInfo();
205
206 // ABI-required frame pointer.
208 return true;
209
210 // Frame pointer required for use within this function.
211 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
212 MFI.isFrameAddressTaken());
213}
214
215/// isFPReserved - Return true if the frame pointer register should be
216/// considered a reserved register on the scope of the specified function.
218 return hasFP(MF) || MF.getSubtarget<ARMSubtarget>().createAAPCSFrameChain();
219}
220
221/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
222/// not required, we reserve argument space for call sites in the function
223/// immediately on entry to the current function. This eliminates the need for
224/// add/sub sp brackets around call sites. Returns true if the call frame is
225/// included as part of the stack frame.
227 const MachineFrameInfo &MFI = MF.getFrameInfo();
228 unsigned CFSize = MFI.getMaxCallFrameSize();
229 // It's not always a good idea to include the call frame as part of the
230 // stack frame. ARM (especially Thumb) has small immediate offset to
231 // address the stack frame. So a large call frame can cause poor codegen
232 // and may even makes it impossible to scavenge a register.
233 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
234 return false;
235
236 return !MFI.hasVarSizedObjects();
237}
238
239/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
240/// call frame pseudos can be simplified. Unlike most targets, having a FP
241/// is not sufficient here since we still may reference some objects via SP
242/// even when FP is available in Thumb2 mode.
243bool
246}
247
248// Returns how much of the incoming argument stack area we should clean up in an
249// epilogue. For the C calling convention this will be 0, for guaranteed tail
250// call conventions it can be positive (a normal return or a tail call to a
251// function that uses less stack space for arguments) or negative (for a tail
252// call to a function that needs more stack space than us for arguments).
256 bool IsTailCallReturn = false;
257 if (MBB.end() != MBBI) {
258 unsigned RetOpcode = MBBI->getOpcode();
259 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
260 RetOpcode == ARM::TCRETURNri;
261 }
263
264 int ArgumentPopSize = 0;
265 if (IsTailCallReturn) {
266 MachineOperand &StackAdjust = MBBI->getOperand(1);
267
268 // For a tail-call in a callee-pops-arguments environment, some or all of
269 // the stack may actually be in use for the call's arguments, this is
270 // calculated during LowerCall and consumed here...
271 ArgumentPopSize = StackAdjust.getImm();
272 } else {
273 // ... otherwise the amount to pop is *all* of the argument space,
274 // conveniently stored in the MachineFunctionInfo by
275 // LowerFormalArguments. This will, of course, be zero for the C calling
276 // convention.
277 ArgumentPopSize = AFI->getArgumentStackToRestore();
278 }
279
280 return ArgumentPopSize;
281}
282
283static bool needsWinCFI(const MachineFunction &MF) {
284 const Function &F = MF.getFunction();
285 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
286 F.needsUnwindTableEntry();
287}
288
289// Given a load or a store instruction, generate an appropriate unwinding SEH
290// code on Windows.
292 const TargetInstrInfo &TII,
293 unsigned Flags) {
294 unsigned Opc = MBBI->getOpcode();
296 MachineFunction &MF = *MBB->getParent();
297 DebugLoc DL = MBBI->getDebugLoc();
299 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
300 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
301
302 Flags |= MachineInstr::NoMerge;
303
304 switch (Opc) {
305 default:
306 report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
307 break;
308 case ARM::t2ADDri: // add.w r11, sp, #xx
309 case ARM::t2ADDri12: // add.w r11, sp, #xx
310 case ARM::t2MOVTi16: // movt r4, #xx
311 case ARM::tBL: // bl __chkstk
312 // These are harmless if used for just setting up a frame pointer,
313 // but that frame pointer can't be relied upon for unwinding, unless
314 // set up with SEH_SaveSP.
315 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
316 .addImm(/*Wide=*/1)
317 .setMIFlags(Flags);
318 break;
319
320 case ARM::t2MOVi16: { // mov(w) r4, #xx
321 bool Wide = MBBI->getOperand(1).getImm() >= 256;
322 if (!Wide) {
323 MachineInstrBuilder NewInstr =
324 BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
325 NewInstr.add(MBBI->getOperand(0));
326 NewInstr.add(t1CondCodeOp(/*isDead=*/true));
327 for (MachineOperand &MO : llvm::drop_begin(MBBI->operands()))
328 NewInstr.add(MO);
329 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
330 MBB->erase(MBBI);
331 MBBI = NewMBBI;
332 }
333 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
334 break;
335 }
336
337 case ARM::tBLXr: // blx r12 (__chkstk)
338 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
339 .addImm(/*Wide=*/0)
340 .setMIFlags(Flags);
341 break;
342
343 case ARM::t2MOVi32imm: // movw+movt
344 // This pseudo instruction expands into two mov instructions. If the
345 // second operand is a symbol reference, this will stay as two wide
346 // instructions, movw+movt. If they're immediates, the first one can
347 // end up as a narrow mov though.
348 // As two SEH instructions are appended here, they won't get interleaved
349 // between the two final movw/movt instructions, but it doesn't make any
350 // practical difference.
351 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
352 .addImm(/*Wide=*/1)
353 .setMIFlags(Flags);
354 MBB->insertAfter(MBBI, MIB);
355 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
356 .addImm(/*Wide=*/1)
357 .setMIFlags(Flags);
358 break;
359
360 case ARM::t2STR_PRE:
361 if (MBBI->getOperand(0).getReg() == ARM::SP &&
362 MBBI->getOperand(2).getReg() == ARM::SP &&
363 MBBI->getOperand(3).getImm() == -4) {
364 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
365 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
366 .addImm(1ULL << Reg)
367 .addImm(/*Wide=*/1)
368 .setMIFlags(Flags);
369 } else {
370 report_fatal_error("No matching SEH Opcode for t2STR_PRE");
371 }
372 break;
373
374 case ARM::t2LDR_POST:
375 if (MBBI->getOperand(1).getReg() == ARM::SP &&
376 MBBI->getOperand(2).getReg() == ARM::SP &&
377 MBBI->getOperand(3).getImm() == 4) {
378 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
379 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
380 .addImm(1ULL << Reg)
381 .addImm(/*Wide=*/1)
382 .setMIFlags(Flags);
383 } else {
384 report_fatal_error("No matching SEH Opcode for t2LDR_POST");
385 }
386 break;
387
388 case ARM::t2LDMIA_RET:
389 case ARM::t2LDMIA_UPD:
390 case ARM::t2STMDB_UPD: {
391 unsigned Mask = 0;
392 bool Wide = false;
393 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
394 const MachineOperand &MO = MBBI->getOperand(i);
395 if (!MO.isReg() || MO.isImplicit())
396 continue;
397 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
398 if (Reg == 15)
399 Reg = 14;
400 if (Reg >= 8 && Reg <= 13)
401 Wide = true;
402 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
403 Wide = true;
404 Mask |= 1 << Reg;
405 }
406 if (!Wide) {
407 unsigned NewOpc;
408 switch (Opc) {
409 case ARM::t2LDMIA_RET:
410 NewOpc = ARM::tPOP_RET;
411 break;
412 case ARM::t2LDMIA_UPD:
413 NewOpc = ARM::tPOP;
414 break;
415 case ARM::t2STMDB_UPD:
416 NewOpc = ARM::tPUSH;
417 break;
418 default:
420 }
421 MachineInstrBuilder NewInstr =
422 BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
423 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
424 NewInstr.add(MBBI->getOperand(i));
425 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
426 MBB->erase(MBBI);
427 MBBI = NewMBBI;
428 }
429 unsigned SEHOpc =
430 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
431 MIB = BuildMI(MF, DL, TII.get(SEHOpc))
432 .addImm(Mask)
433 .addImm(Wide ? 1 : 0)
434 .setMIFlags(Flags);
435 break;
436 }
437 case ARM::VSTMDDB_UPD:
438 case ARM::VLDMDIA_UPD: {
439 int First = -1, Last = 0;
440 for (const MachineOperand &MO : llvm::drop_begin(MBBI->operands(), 4)) {
441 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
442 if (First == -1)
443 First = Reg;
444 Last = Reg;
445 }
446 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
447 .addImm(First)
448 .addImm(Last)
449 .setMIFlags(Flags);
450 break;
451 }
452 case ARM::tSUBspi:
453 case ARM::tADDspi:
454 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
455 .addImm(MBBI->getOperand(2).getImm() * 4)
456 .addImm(/*Wide=*/0)
457 .setMIFlags(Flags);
458 break;
459 case ARM::t2SUBspImm:
460 case ARM::t2SUBspImm12:
461 case ARM::t2ADDspImm:
462 case ARM::t2ADDspImm12:
463 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
464 .addImm(MBBI->getOperand(2).getImm())
465 .addImm(/*Wide=*/1)
466 .setMIFlags(Flags);
467 break;
468
469 case ARM::tMOVr:
470 if (MBBI->getOperand(1).getReg() == ARM::SP &&
471 (Flags & MachineInstr::FrameSetup)) {
472 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
473 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
474 .addImm(Reg)
475 .setMIFlags(Flags);
476 } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
477 (Flags & MachineInstr::FrameDestroy)) {
478 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
479 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
480 .addImm(Reg)
481 .setMIFlags(Flags);
482 } else {
483 report_fatal_error("No SEH Opcode for MOV");
484 }
485 break;
486
487 case ARM::tBX_RET:
488 case ARM::TCRETURNri:
489 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
490 .addImm(/*Wide=*/0)
491 .setMIFlags(Flags);
492 break;
493
494 case ARM::TCRETURNdi:
495 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
496 .addImm(/*Wide=*/1)
497 .setMIFlags(Flags);
498 break;
499 }
500 return MBB->insertAfter(MBBI, MIB);
501}
502
505 if (MBBI == MBB.begin())
507 return std::prev(MBBI);
508}
509
513 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
514 if (Start.isValid())
515 Start = std::next(Start);
516 else
517 Start = MBB.begin();
518
519 for (auto MI = Start; MI != End;) {
520 auto Next = std::next(MI);
521 // Check if this instruction already has got a SEH opcode added. In that
522 // case, don't do this generic mapping.
523 if (Next != End && isSEHInstruction(*Next)) {
524 MI = std::next(Next);
525 while (MI != End && isSEHInstruction(*MI))
526 ++MI;
527 continue;
528 }
529 insertSEH(MI, TII, MIFlags);
530 MI = Next;
531 }
532}
533
536 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
537 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
538 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
539 if (isARM)
540 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
541 Pred, PredReg, TII, MIFlags);
542 else
543 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
544 Pred, PredReg, TII, MIFlags);
545}
546
547static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
549 const ARMBaseInstrInfo &TII, int NumBytes,
550 unsigned MIFlags = MachineInstr::NoFlags,
552 unsigned PredReg = 0) {
553 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
554 MIFlags, Pred, PredReg);
555}
556
558 int RegSize;
559 switch (MI.getOpcode()) {
560 case ARM::VSTMDDB_UPD:
561 RegSize = 8;
562 break;
563 case ARM::STMDB_UPD:
564 case ARM::t2STMDB_UPD:
565 RegSize = 4;
566 break;
567 case ARM::t2STR_PRE:
568 case ARM::STR_PRE_IMM:
569 return 4;
570 default:
571 llvm_unreachable("Unknown push or pop like instruction");
572 }
573
574 int count = 0;
575 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
576 // pred) so the list starts at 4.
577 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
578 count += RegSize;
579 return count;
580}
581
583 size_t StackSizeInBytes) {
584 const MachineFrameInfo &MFI = MF.getFrameInfo();
585 const Function &F = MF.getFunction();
586 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
587
588 StackProbeSize =
589 F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);
590 return (StackSizeInBytes >= StackProbeSize) &&
591 !F.hasFnAttribute("no-stack-arg-probe");
592}
593
594namespace {
595
596struct StackAdjustingInsts {
597 struct InstInfo {
599 unsigned SPAdjust;
600 bool BeforeFPSet;
601 };
602
604
605 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
606 bool BeforeFPSet = false) {
607 InstInfo Info = {I, SPAdjust, BeforeFPSet};
608 Insts.push_back(Info);
609 }
610
611 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
612 auto Info =
613 llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
614 assert(Info != Insts.end() && "invalid sp adjusting instruction");
615 Info->SPAdjust += ExtraBytes;
616 }
617
618 void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
619 const ARMBaseInstrInfo &TII, bool HasFP) {
621 unsigned CFAOffset = 0;
622 for (auto &Info : Insts) {
623 if (HasFP && !Info.BeforeFPSet)
624 return;
625
626 CFAOffset += Info.SPAdjust;
627 unsigned CFIIndex = MF.addFrameInst(
628 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
629 BuildMI(MBB, std::next(Info.I), dl,
630 TII.get(TargetOpcode::CFI_INSTRUCTION))
631 .addCFIIndex(CFIIndex)
633 }
634 }
635};
636
637} // end anonymous namespace
638
639/// Emit an instruction sequence that will align the address in
640/// register Reg by zero-ing out the lower bits. For versions of the
641/// architecture that support Neon, this must be done in a single
642/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
643/// single instruction. That function only gets called when optimizing
644/// spilling of D registers on a core with the Neon instruction set
645/// present.
647 const TargetInstrInfo &TII,
650 const DebugLoc &DL, const unsigned Reg,
651 const Align Alignment,
652 const bool MustBeSingleInstruction) {
653 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
654 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
655 const unsigned AlignMask = Alignment.value() - 1U;
656 const unsigned NrBitsToZero = Log2(Alignment);
657 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
658 if (!AFI->isThumbFunction()) {
659 // if the BFC instruction is available, use that to zero the lower
660 // bits:
661 // bfc Reg, #0, log2(Alignment)
662 // otherwise use BIC, if the mask to zero the required number of bits
663 // can be encoded in the bic immediate field
664 // bic Reg, Reg, Alignment-1
665 // otherwise, emit
666 // lsr Reg, Reg, log2(Alignment)
667 // lsl Reg, Reg, log2(Alignment)
668 if (CanUseBFC) {
669 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
671 .addImm(~AlignMask)
673 } else if (AlignMask <= 255) {
674 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
676 .addImm(AlignMask)
678 .add(condCodeOp());
679 } else {
680 assert(!MustBeSingleInstruction &&
681 "Shouldn't call emitAligningInstructions demanding a single "
682 "instruction to be emitted for large stack alignment for a target "
683 "without BFC.");
684 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
686 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
688 .add(condCodeOp());
689 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
691 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
693 .add(condCodeOp());
694 }
695 } else {
696 // Since this is only reached for Thumb-2 targets, the BFC instruction
697 // should always be available.
698 assert(CanUseBFC);
699 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
701 .addImm(~AlignMask)
703 }
704}
705
706/// We need the offset of the frame pointer relative to other MachineFrameInfo
707/// offsets which are encoded relative to SP at function begin.
708/// See also emitPrologue() for how the FP is set up.
709/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
710/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
711/// this to produce a conservative estimate that we check in an assert() later.
712static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
713 const MachineFunction &MF) {
714 // For Thumb1, push.w isn't available, so the first push will always push
715 // r7 and lr onto the stack first.
716 if (AFI.isThumb1OnlyFunction())
717 return -AFI.getArgRegsSaveSize() - (2 * 4);
718 // This is a conservative estimation: Assume the frame pointer being r7 and
719 // pc("r15") up to r8 getting spilled before (= 8 registers).
720 int MaxRegBytes = 8 * 4;
721 if (STI.splitFramePointerPush(MF)) {
722 // Here, r11 can be stored below all of r4-r15 (3 registers more than
723 // above), plus d8-d15.
724 MaxRegBytes = 11 * 4 + 8 * 8;
725 }
726 int FPCXTSaveSize =
727 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
728 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
729}
730
732 MachineBasicBlock &MBB) const {
734 MachineFrameInfo &MFI = MF.getFrameInfo();
736 MachineModuleInfo &MMI = MF.getMMI();
737 MCContext &Context = MMI.getContext();
738 const TargetMachine &TM = MF.getTarget();
739 const MCRegisterInfo *MRI = Context.getRegisterInfo();
740 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
743 "This emitPrologue does not support Thumb1!");
744 bool isARM = !AFI->isThumbFunction();
745 Align Alignment = STI.getFrameLowering()->getStackAlign();
746 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
747 unsigned NumBytes = MFI.getStackSize();
748 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
749 int FPCXTSaveSize = 0;
750 bool NeedsWinCFI = needsWinCFI(MF);
751
752 // Debug location must be unknown since the first debug location is used
753 // to determine the end of the prologue.
754 DebugLoc dl;
755
756 Register FramePtr = RegInfo->getFrameRegister(MF);
757
758 // Determine the sizes of each callee-save spill areas and record which frame
759 // belongs to which callee-save spill areas.
760 unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
761 int FramePtrSpillFI = 0;
762 int D8SpillFI = 0;
763
764 // All calls are tail calls in GHC calling conv, and functions have no
765 // prologue/epilogue.
767 return;
768
769 StackAdjustingInsts DefCFAOffsetCandidates;
770 bool HasFP = hasFP(MF);
771
772 if (!AFI->hasStackFrame() &&
773 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
774 if (NumBytes != 0) {
775 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
777 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
778 }
779 if (!NeedsWinCFI)
780 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
781 if (NeedsWinCFI && MBBI != MBB.begin()) {
783 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
785 MF.setHasWinCFI(true);
786 }
787 return;
788 }
789
790 // Determine spill area sizes.
791 if (STI.splitFramePointerPush(MF)) {
792 for (const CalleeSavedInfo &I : CSI) {
793 Register Reg = I.getReg();
794 int FI = I.getFrameIdx();
795 switch (Reg) {
796 case ARM::R11:
797 case ARM::LR:
798 if (Reg == FramePtr)
799 FramePtrSpillFI = FI;
800 GPRCS2Size += 4;
801 break;
802 case ARM::R0:
803 case ARM::R1:
804 case ARM::R2:
805 case ARM::R3:
806 case ARM::R4:
807 case ARM::R5:
808 case ARM::R6:
809 case ARM::R7:
810 case ARM::R8:
811 case ARM::R9:
812 case ARM::R10:
813 case ARM::R12:
814 GPRCS1Size += 4;
815 break;
816 case ARM::FPCXTNS:
817 FPCXTSaveSize = 4;
818 break;
819 default:
820 // This is a DPR. Exclude the aligned DPRCS2 spills.
821 if (Reg == ARM::D8)
822 D8SpillFI = FI;
823 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
824 DPRCSSize += 8;
825 }
826 }
827 } else {
828 for (const CalleeSavedInfo &I : CSI) {
829 Register Reg = I.getReg();
830 int FI = I.getFrameIdx();
831 switch (Reg) {
832 case ARM::R8:
833 case ARM::R9:
834 case ARM::R10:
835 case ARM::R11:
836 case ARM::R12:
837 if (STI.splitFramePushPop(MF)) {
838 GPRCS2Size += 4;
839 break;
840 }
841 [[fallthrough]];
842 case ARM::R0:
843 case ARM::R1:
844 case ARM::R2:
845 case ARM::R3:
846 case ARM::R4:
847 case ARM::R5:
848 case ARM::R6:
849 case ARM::R7:
850 case ARM::LR:
851 if (Reg == FramePtr)
852 FramePtrSpillFI = FI;
853 GPRCS1Size += 4;
854 break;
855 case ARM::FPCXTNS:
856 FPCXTSaveSize = 4;
857 break;
858 default:
859 // This is a DPR. Exclude the aligned DPRCS2 spills.
860 if (Reg == ARM::D8)
861 D8SpillFI = FI;
862 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
863 DPRCSSize += 8;
864 }
865 }
866 }
867
868 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
869
870 // Move past the PAC computation.
871 if (AFI->shouldSignReturnAddress())
872 LastPush = MBBI++;
873
874 // Move past FPCXT area.
875 if (FPCXTSaveSize > 0) {
876 LastPush = MBBI++;
877 DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
878 }
879
880 // Allocate the vararg register save area.
881 if (ArgRegsSaveSize) {
882 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
884 LastPush = std::prev(MBBI);
885 DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
886 }
887
888 // Move past area 1.
889 if (GPRCS1Size > 0) {
890 GPRCS1Push = LastPush = MBBI++;
891 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
892 }
893
894 // Determine starting offsets of spill areas.
895 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
896 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
897 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
898 Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
899 unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
900 if (!STI.splitFramePointerPush(MF)) {
901 DPRGapSize += GPRCS2Size;
902 }
903 DPRGapSize %= DPRAlign.value();
904
905 unsigned DPRCSOffset;
906 if (STI.splitFramePointerPush(MF)) {
907 DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
908 GPRCS2Offset = DPRCSOffset - GPRCS2Size;
909 } else {
910 DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
911 }
912 int FramePtrOffsetInPush = 0;
913 if (HasFP) {
914 int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
915 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
916 "Max FP estimation is wrong");
917 FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
918 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
919 NumBytes);
920 }
921 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
922 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
923 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
924
925 // Move past area 2.
926 if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
927 GPRCS2Push = LastPush = MBBI++;
928 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
929 }
930
931 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
932 // .cfi_offset operations will reflect that.
933 if (DPRGapSize) {
934 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
935 if (LastPush != MBB.end() &&
936 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
937 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
938 else {
939 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
941 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
942 }
943 }
944
945 // Move past area 3.
946 if (DPRCSSize > 0) {
947 // Since vpush register list cannot have gaps, there may be multiple vpush
948 // instructions in the prologue.
949 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
950 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
951 LastPush = MBBI++;
952 }
953 }
954
955 // Move past the aligned DPRCS2 area.
956 if (AFI->getNumAlignedDPRCS2Regs() > 0) {
958 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
959 // leaves the stack pointer pointing to the DPRCS2 area.
960 //
961 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
962 NumBytes += MFI.getObjectOffset(D8SpillFI);
963 } else
964 NumBytes = DPRCSOffset;
965
966 if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
967 GPRCS2Push = LastPush = MBBI++;
968 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
969 }
970
971 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
972 if (STI.splitFramePointerPush(MF) && HasFP)
973 NeedsWinCFIStackAlloc = false;
974
975 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
976 uint32_t NumWords = NumBytes >> 2;
977
978 if (NumWords < 65536) {
979 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
980 .addImm(NumWords)
983 } else {
984 // Split into two instructions here, instead of using t2MOVi32imm,
985 // to allow inserting accurate SEH instructions (including accurate
986 // instruction size for each of them).
987 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
988 .addImm(NumWords & 0xffff)
991 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
992 .addReg(ARM::R4)
993 .addImm(NumWords >> 16)
996 }
997
998 switch (TM.getCodeModel()) {
999 case CodeModel::Tiny:
1000 llvm_unreachable("Tiny code model not available on ARM.");
1001 case CodeModel::Small:
1002 case CodeModel::Medium:
1003 case CodeModel::Kernel:
1004 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1006 .addExternalSymbol("__chkstk")
1007 .addReg(ARM::R4, RegState::Implicit)
1009 break;
1010 case CodeModel::Large:
1011 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1012 .addExternalSymbol("__chkstk")
1014
1015 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1017 .addReg(ARM::R12, RegState::Kill)
1018 .addReg(ARM::R4, RegState::Implicit)
1020 break;
1021 }
1022
1023 MachineInstrBuilder Instr, SEH;
1024 Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1025 .addReg(ARM::SP, RegState::Kill)
1026 .addReg(ARM::R4, RegState::Kill)
1029 .add(condCodeOp());
1030 if (NeedsWinCFIStackAlloc) {
1031 SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1032 .addImm(NumBytes)
1033 .addImm(/*Wide=*/1)
1035 MBB.insertAfter(Instr, SEH);
1036 }
1037 NumBytes = 0;
1038 }
1039
1040 if (NumBytes) {
1041 // Adjust SP after all the callee-save spills.
1042 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1043 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
1044 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
1045 else {
1046 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
1048 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
1049 }
1050
1051 if (HasFP && isARM)
1052 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1053 // Note it's not safe to do this in Thumb2 mode because it would have
1054 // taken two instructions:
1055 // mov sp, r7
1056 // sub sp, #24
1057 // If an interrupt is taken between the two instructions, then sp is in
1058 // an inconsistent state (pointing to the middle of callee-saved area).
1059 // The interrupt handler can end up clobbering the registers.
1060 AFI->setShouldRestoreSPFromFP(true);
1061 }
1062
1063 // Set FP to point to the stack slot that contains the previous FP.
1064 // For iOS, FP is R7, which has now been stored in spill area 1.
1065 // Otherwise, if this is not iOS, all the callee-saved registers go
1066 // into spill area 1, including the FP in R11. In either case, it
1067 // is in area one and the adjustment needs to take place just after
1068 // that push.
1069 // FIXME: The above is not necessary true when PACBTI is enabled.
1070 // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1071 // so FP ends up on area two.
1073 if (HasFP) {
1074 AfterPush = std::next(GPRCS1Push);
1075 unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
1076 int FPOffset = PushSize + FramePtrOffsetInPush;
1077 if (STI.splitFramePointerPush(MF)) {
1078 AfterPush = std::next(GPRCS2Push);
1079 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1080 FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
1081 } else {
1082 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1083 FramePtr, ARM::SP, FPOffset,
1085 }
1086 if (!NeedsWinCFI) {
1087 if (FramePtrOffsetInPush + PushSize != 0) {
1088 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
1089 nullptr, MRI->getDwarfRegNum(FramePtr, true),
1090 FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1091 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1092 .addCFIIndex(CFIIndex)
1094 } else {
1095 unsigned CFIIndex =
1097 nullptr, MRI->getDwarfRegNum(FramePtr, true)));
1098 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1099 .addCFIIndex(CFIIndex)
1101 }
1102 }
1103 }
1104
1105 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1106 // instructions below don't need to be replayed to unwind the stack.
1107 if (NeedsWinCFI && MBBI != MBB.begin()) {
1109 if (HasFP && STI.splitFramePointerPush(MF))
1110 End = AfterPush;
1112 BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1114 MF.setHasWinCFI(true);
1115 }
1116
1117 // Now that the prologue's actual instructions are finalised, we can insert
1118 // the necessary DWARF cf instructions to describe the situation. Start by
1119 // recording where each register ended up:
1120 if (GPRCS1Size > 0 && !NeedsWinCFI) {
1121 MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
1122 int CFIIndex;
1123 for (const auto &Entry : CSI) {
1124 Register Reg = Entry.getReg();
1125 int FI = Entry.getFrameIdx();
1126 switch (Reg) {
1127 case ARM::R8:
1128 case ARM::R9:
1129 case ARM::R10:
1130 case ARM::R11:
1131 case ARM::R12:
1132 if (STI.splitFramePushPop(MF))
1133 break;
1134 [[fallthrough]];
1135 case ARM::R0:
1136 case ARM::R1:
1137 case ARM::R2:
1138 case ARM::R3:
1139 case ARM::R4:
1140 case ARM::R5:
1141 case ARM::R6:
1142 case ARM::R7:
1143 case ARM::LR:
1145 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
1146 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1147 .addCFIIndex(CFIIndex)
1149 break;
1150 }
1151 }
1152 }
1153
1154 if (GPRCS2Size > 0 && !NeedsWinCFI) {
1155 MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
1156 for (const auto &Entry : CSI) {
1157 Register Reg = Entry.getReg();
1158 int FI = Entry.getFrameIdx();
1159 switch (Reg) {
1160 case ARM::R8:
1161 case ARM::R9:
1162 case ARM::R10:
1163 case ARM::R11:
1164 case ARM::R12:
1165 if (STI.splitFramePushPop(MF)) {
1166 unsigned DwarfReg = MRI->getDwarfRegNum(
1167 Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
1168 unsigned Offset = MFI.getObjectOffset(FI);
1169 unsigned CFIIndex = MF.addFrameInst(
1170 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1171 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1172 .addCFIIndex(CFIIndex)
1174 }
1175 break;
1176 }
1177 }
1178 }
1179
1180 if (DPRCSSize > 0 && !NeedsWinCFI) {
1181 // Since vpush register list cannot have gaps, there may be multiple vpush
1182 // instructions in the prologue.
1183 MachineBasicBlock::iterator Pos = std::next(LastPush);
1184 for (const auto &Entry : CSI) {
1185 Register Reg = Entry.getReg();
1186 int FI = Entry.getFrameIdx();
1187 if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
1188 (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
1189 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
1190 unsigned Offset = MFI.getObjectOffset(FI);
1191 unsigned CFIIndex = MF.addFrameInst(
1192 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1193 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1194 .addCFIIndex(CFIIndex)
1196 }
1197 }
1198 }
1199
1200 // Now we can emit descriptions of where the canonical frame address was
1201 // throughout the process. If we have a frame pointer, it takes over the job
1202 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1203 // actually get emitted.
1204 if (!NeedsWinCFI)
1205 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
1206
1207 if (STI.isTargetELF() && hasFP(MF))
1209 AFI->getFramePtrSpillOffset());
1210
1211 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1212 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1213 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1214 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1215 AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
1216
1217 // If we need dynamic stack realignment, do it here. Be paranoid and make
1218 // sure if we also have VLAs, we have a base pointer for frame access.
1219 // If aligned NEON registers were spilled, the stack has already been
1220 // realigned.
1221 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1222 Align MaxAlign = MFI.getMaxAlign();
1224 if (!AFI->isThumbFunction()) {
1225 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1226 false);
1227 } else {
1228 // We cannot use sp as source/dest register here, thus we're using r4 to
1229 // perform the calculations. We're emitting the following sequence:
1230 // mov r4, sp
1231 // -- use emitAligningInstructions to produce best sequence to zero
1232 // -- out lower bits in r4
1233 // mov sp, r4
1234 // FIXME: It will be better just to find spare register here.
1235 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1236 .addReg(ARM::SP, RegState::Kill)
1238 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1239 false);
1240 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1241 .addReg(ARM::R4, RegState::Kill)
1243 }
1244
1245 AFI->setShouldRestoreSPFromFP(true);
1246 }
1247
1248 // If we need a base pointer, set it up here. It's whatever the value
1249 // of the stack pointer is at this point. Any variable size objects
1250 // will be allocated after this, so we can still use the base pointer
1251 // to reference locals.
1252 // FIXME: Clarify FrameSetup flags here.
1253 if (RegInfo->hasBasePointer(MF)) {
1254 if (isARM)
1255 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1256 .addReg(ARM::SP)
1258 .add(condCodeOp());
1259 else
1260 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1261 .addReg(ARM::SP)
1263 }
1264
1265 // If the frame has variable sized objects then the epilogue must restore
1266 // the sp from fp. We can assume there's an FP here since hasFP already
1267 // checks for hasVarSizedObjects.
1268 if (MFI.hasVarSizedObjects())
1269 AFI->setShouldRestoreSPFromFP(true);
1270}
1271
1273 MachineBasicBlock &MBB) const {
1274 MachineFrameInfo &MFI = MF.getFrameInfo();
1276 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1277 const ARMBaseInstrInfo &TII =
1278 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1279 assert(!AFI->isThumb1OnlyFunction() &&
1280 "This emitEpilogue does not support Thumb1!");
1281 bool isARM = !AFI->isThumbFunction();
1282
1283 // Amount of stack space we reserved next to incoming args for either
1284 // varargs registers or stack arguments in tail calls made by this function.
1285 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1286
1287 // How much of the stack used by incoming arguments this function is expected
1288 // to restore in this particular epilogue.
1289 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1290 int NumBytes = (int)MFI.getStackSize();
1291 Register FramePtr = RegInfo->getFrameRegister(MF);
1292
1293 // All calls are tail calls in GHC calling conv, and functions have no
1294 // prologue/epilogue.
1296 return;
1297
1298 // First put ourselves on the first (from top) terminator instructions.
1300 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1301
1302 MachineBasicBlock::iterator RangeStart;
1303 if (!AFI->hasStackFrame()) {
1304 if (MF.hasWinCFI()) {
1305 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1307 RangeStart = initMBBRange(MBB, MBBI);
1308 }
1309
1310 if (NumBytes + IncomingArgStackToRestore != 0)
1311 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1312 NumBytes + IncomingArgStackToRestore,
1314 } else {
1315 // Unwind MBBI to point to first LDR / VLDRD.
1316 if (MBBI != MBB.begin()) {
1317 do {
1318 --MBBI;
1319 } while (MBBI != MBB.begin() &&
1321 if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1322 ++MBBI;
1323 }
1324
1325 if (MF.hasWinCFI()) {
1326 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1328 RangeStart = initMBBRange(MBB, MBBI);
1329 }
1330
1331 // Move SP to start of FP callee save spill area.
1332 NumBytes -= (ReservedArgStack +
1333 AFI->getFPCXTSaveAreaSize() +
1338
1339 // Reset SP based on frame pointer only if the stack frame extends beyond
1340 // frame pointer stack slot or target is ELF and the function has FP.
1341 if (AFI->shouldRestoreSPFromFP()) {
1342 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1343 if (NumBytes) {
1344 if (isARM)
1345 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1346 ARMCC::AL, 0, TII,
1348 else {
1349 // It's not possible to restore SP from FP in a single instruction.
1350 // For iOS, this looks like:
1351 // mov sp, r7
1352 // sub sp, #24
1353 // This is bad, if an interrupt is taken after the mov, sp is in an
1354 // inconsistent state.
1355 // Use the first callee-saved register as a scratch register.
1356 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1357 "No scratch register to restore SP from FP!");
1358 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1360 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1361 .addReg(ARM::R4)
1364 }
1365 } else {
1366 // Thumb2 or ARM.
1367 if (isARM)
1368 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1371 .add(condCodeOp())
1373 else
1374 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1378 }
1379 } else if (NumBytes &&
1380 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1381 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1383
1384 // Increment past our save areas.
1386 MBBI++;
1387
1388 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
1389 MBBI++;
1390 // Since vpop register list cannot have gaps, there may be multiple vpop
1391 // instructions in the epilogue.
1392 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1393 MBBI++;
1394 }
1395 if (AFI->getDPRCalleeSavedGapSize()) {
1396 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1397 "unexpected DPR alignment gap");
1398 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1400 }
1401
1403 MBBI++;
1404 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1405
1406 if (ReservedArgStack || IncomingArgStackToRestore) {
1407 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1408 "attempting to restore negative stack amount");
1409 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1410 ReservedArgStack + IncomingArgStackToRestore,
1412 }
1413
1414 // Validate PAC, It should have been already popped into R12. For CMSE entry
1415 // function, the validation instruction is emitted during expansion of the
1416 // tBXNS_RET, since the validation must use the value of SP at function
1417 // entry, before saving, resp. after restoring, FPCXTNS.
1418 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1419 BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1420 }
1421
1422 if (MF.hasWinCFI()) {
1424 BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1426 }
1427}
1428
1429/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1430/// debug info. It's the same as what we use for resolving the code-gen
1431/// references for now. FIXME: This can go wrong when references are
1432/// SP-relative and simple call frames aren't used.
1434 int FI,
1435 Register &FrameReg) const {
1436 return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1437}
1438
1440 int FI, Register &FrameReg,
1441 int SPAdj) const {
1442 const MachineFrameInfo &MFI = MF.getFrameInfo();
1443 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1445 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1446 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1447 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1448 bool isFixed = MFI.isFixedObjectIndex(FI);
1449
1450 FrameReg = ARM::SP;
1451 Offset += SPAdj;
1452
1453 // SP can move around if there are allocas. We may also lose track of SP
1454 // when emergency spilling inside a non-reserved call frame setup.
1455 bool hasMovingSP = !hasReservedCallFrame(MF);
1456
1457 // When dynamically realigning the stack, use the frame pointer for
1458 // parameters, and the stack/base pointer for locals.
1459 if (RegInfo->hasStackRealignment(MF)) {
1460 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1461 if (isFixed) {
1462 FrameReg = RegInfo->getFrameRegister(MF);
1463 Offset = FPOffset;
1464 } else if (hasMovingSP) {
1465 assert(RegInfo->hasBasePointer(MF) &&
1466 "VLAs and dynamic stack alignment, but missing base pointer!");
1467 FrameReg = RegInfo->getBaseRegister();
1468 Offset -= SPAdj;
1469 }
1470 return Offset;
1471 }
1472
1473 // If there is a frame pointer, use it when we can.
1474 if (hasFP(MF) && AFI->hasStackFrame()) {
1475 // Use frame pointer to reference fixed objects. Use it for locals if
1476 // there are VLAs (and thus the SP isn't reliable as a base).
1477 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1478 FrameReg = RegInfo->getFrameRegister(MF);
1479 return FPOffset;
1480 } else if (hasMovingSP) {
1481 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1482 if (AFI->isThumb2Function()) {
1483 // Try to use the frame pointer if we can, else use the base pointer
1484 // since it's available. This is handy for the emergency spill slot, in
1485 // particular.
1486 if (FPOffset >= -255 && FPOffset < 0) {
1487 FrameReg = RegInfo->getFrameRegister(MF);
1488 return FPOffset;
1489 }
1490 }
1491 } else if (AFI->isThumbFunction()) {
1492 // Prefer SP to base pointer, if the offset is suitably aligned and in
1493 // range as the effective range of the immediate offset is bigger when
1494 // basing off SP.
1495 // Use add <rd>, sp, #<imm8>
1496 // ldr <rd>, [sp, #<imm8>]
1497 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1498 return Offset;
1499 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1500 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1501 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1502 FrameReg = RegInfo->getFrameRegister(MF);
1503 return FPOffset;
1504 }
1505 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1506 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1507 FrameReg = RegInfo->getFrameRegister(MF);
1508 return FPOffset;
1509 }
1510 }
1511 // Use the base pointer if we have one.
1512 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1513 // That can happen if we forced a base pointer for a large call frame.
1514 if (RegInfo->hasBasePointer(MF)) {
1515 FrameReg = RegInfo->getBaseRegister();
1516 Offset -= SPAdj;
1517 }
1518 return Offset;
1519}
1520
1521void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1524 unsigned StmOpc, unsigned StrOpc,
1525 bool NoGap, bool (*Func)(unsigned, bool),
1526 unsigned NumAlignedDPRCS2Regs,
1527 unsigned MIFlags) const {
1528 MachineFunction &MF = *MBB.getParent();
1531
1532 DebugLoc DL;
1533
1534 using RegAndKill = std::pair<unsigned, bool>;
1535
1537 unsigned i = CSI.size();
1538 while (i != 0) {
1539 unsigned LastReg = 0;
1540 for (; i != 0; --i) {
1541 Register Reg = CSI[i-1].getReg();
1542 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1543
1544 // D-registers in the aligned area DPRCS2 are NOT spilled here.
1545 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1546 continue;
1547
1548 const MachineRegisterInfo &MRI = MF.getRegInfo();
1549 bool isLiveIn = MRI.isLiveIn(Reg);
1550 if (!isLiveIn && !MRI.isReserved(Reg))
1551 MBB.addLiveIn(Reg);
1552 // If NoGap is true, push consecutive registers and then leave the rest
1553 // for other instructions. e.g.
1554 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1555 if (NoGap && LastReg && LastReg != Reg-1)
1556 break;
1557 LastReg = Reg;
1558 // Do not set a kill flag on values that are also marked as live-in. This
1559 // happens with the @llvm-returnaddress intrinsic and with arguments
1560 // passed in callee saved registers.
1561 // Omitting the kill flags is conservatively correct even if the live-in
1562 // is not used after all.
1563 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1564 }
1565
1566 if (Regs.empty())
1567 continue;
1568
1569 llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1570 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1571 });
1572
1573 if (Regs.size() > 1 || StrOpc== 0) {
1574 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1575 .addReg(ARM::SP)
1576 .setMIFlags(MIFlags)
1578 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1579 MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1580 } else if (Regs.size() == 1) {
1581 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1582 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1583 .addReg(ARM::SP)
1584 .setMIFlags(MIFlags)
1585 .addImm(-4)
1587 }
1588 Regs.clear();
1589
1590 // Put any subsequent vpush instructions before this one: they will refer to
1591 // higher register numbers so need to be pushed first in order to preserve
1592 // monotonicity.
1593 if (MI != MBB.begin())
1594 --MI;
1595 }
1596}
1597
1598void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1601 unsigned LdmOpc, unsigned LdrOpc,
1602 bool isVarArg, bool NoGap,
1603 bool (*Func)(unsigned, bool),
1604 unsigned NumAlignedDPRCS2Regs) const {
1605 MachineFunction &MF = *MBB.getParent();
1609 bool hasPAC = AFI->shouldSignReturnAddress();
1610 DebugLoc DL;
1611 bool isTailCall = false;
1612 bool isInterrupt = false;
1613 bool isTrap = false;
1614 bool isCmseEntry = false;
1615 if (MBB.end() != MI) {
1616 DL = MI->getDebugLoc();
1617 unsigned RetOpcode = MI->getOpcode();
1618 isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1619 isInterrupt =
1620 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1621 isTrap =
1622 RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1623 RetOpcode == ARM::tTRAP;
1624 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1625 }
1626
1628 unsigned i = CSI.size();
1629 while (i != 0) {
1630 unsigned LastReg = 0;
1631 bool DeleteRet = false;
1632 for (; i != 0; --i) {
1633 CalleeSavedInfo &Info = CSI[i-1];
1634 Register Reg = Info.getReg();
1635 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1636
1637 // The aligned reloads from area DPRCS2 are not inserted here.
1638 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1639 continue;
1640 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1641 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1642 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1644 Reg = ARM::PC;
1645 // Fold the return instruction into the LDM.
1646 DeleteRet = true;
1647 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1648 }
1649
1650 // If NoGap is true, pop consecutive registers and then leave the rest
1651 // for other instructions. e.g.
1652 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1653 if (NoGap && LastReg && LastReg != Reg-1)
1654 break;
1655
1656 LastReg = Reg;
1657 Regs.push_back(Reg);
1658 }
1659
1660 if (Regs.empty())
1661 continue;
1662
1663 llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1664 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1665 });
1666
1667 if (Regs.size() > 1 || LdrOpc == 0) {
1668 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1669 .addReg(ARM::SP)
1672 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1673 MIB.addReg(Regs[i], getDefRegState(true));
1674 if (DeleteRet) {
1675 if (MI != MBB.end()) {
1676 MIB.copyImplicitOps(*MI);
1677 MI->eraseFromParent();
1678 }
1679 }
1680 MI = MIB;
1681 } else if (Regs.size() == 1) {
1682 // If we adjusted the reg to PC from LR above, switch it back here. We
1683 // only do that for LDM.
1684 if (Regs[0] == ARM::PC)
1685 Regs[0] = ARM::LR;
1687 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1688 .addReg(ARM::SP, RegState::Define)
1689 .addReg(ARM::SP)
1691 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1692 // that refactoring is complete (eventually).
1693 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1694 MIB.addReg(0);
1696 } else
1697 MIB.addImm(4);
1698 MIB.add(predOps(ARMCC::AL));
1699 }
1700 Regs.clear();
1701
1702 // Put any subsequent vpop instructions after this one: they will refer to
1703 // higher register numbers so need to be popped afterwards.
1704 if (MI != MBB.end())
1705 ++MI;
1706 }
1707}
1708
1709/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1710/// starting from d8. Also insert stack realignment code and leave the stack
1711/// pointer pointing to the d8 spill slot.
1714 unsigned NumAlignedDPRCS2Regs,
1716 const TargetRegisterInfo *TRI) {
1717 MachineFunction &MF = *MBB.getParent();
1719 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1721 MachineFrameInfo &MFI = MF.getFrameInfo();
1722
1723 // Mark the D-register spill slots as properly aligned. Since MFI computes
1724 // stack slot layout backwards, this can actually mean that the d-reg stack
1725 // slot offsets can be wrong. The offset for d8 will always be correct.
1726 for (const CalleeSavedInfo &I : CSI) {
1727 unsigned DNum = I.getReg() - ARM::D8;
1728 if (DNum > NumAlignedDPRCS2Regs - 1)
1729 continue;
1730 int FI = I.getFrameIdx();
1731 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1732 // registers will be 8-byte aligned.
1733 MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1734
1735 // The stack slot for D8 needs to be maximally aligned because this is
1736 // actually the point where we align the stack pointer. MachineFrameInfo
1737 // computes all offsets relative to the incoming stack pointer which is a
1738 // bit weird when realigning the stack. Any extra padding for this
1739 // over-alignment is not realized because the code inserted below adjusts
1740 // the stack pointer by numregs * 8 before aligning the stack pointer.
1741 if (DNum == 0)
1742 MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1743 }
1744
1745 // Move the stack pointer to the d8 spill slot, and align it at the same
1746 // time. Leave the stack slot address in the scratch register r4.
1747 //
1748 // sub r4, sp, #numregs * 8
1749 // bic r4, r4, #align - 1
1750 // mov sp, r4
1751 //
1752 bool isThumb = AFI->isThumbFunction();
1753 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1754 AFI->setShouldRestoreSPFromFP(true);
1755
1756 // sub r4, sp, #numregs * 8
1757 // The immediate is <= 64, so it doesn't need any special encoding.
1758 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1759 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1760 .addReg(ARM::SP)
1761 .addImm(8 * NumAlignedDPRCS2Regs)
1763 .add(condCodeOp());
1764
1765 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1766 // We must set parameter MustBeSingleInstruction to true, since
1767 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1768 // stack alignment. Luckily, this can always be done since all ARM
1769 // architecture versions that support Neon also support the BFC
1770 // instruction.
1771 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1772
1773 // mov sp, r4
1774 // The stack pointer must be adjusted before spilling anything, otherwise
1775 // the stack slots could be clobbered by an interrupt handler.
1776 // Leave r4 live, it is used below.
1777 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1778 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1779 .addReg(ARM::R4)
1781 if (!isThumb)
1782 MIB.add(condCodeOp());
1783
1784 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1785 // r4 holds the stack slot address.
1786 unsigned NextReg = ARM::D8;
1787
1788 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1789 // The writeback is only needed when emitting two vst1.64 instructions.
1790 if (NumAlignedDPRCS2Regs >= 6) {
1791 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1792 &ARM::QQPRRegClass);
1793 MBB.addLiveIn(SupReg);
1794 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1795 .addReg(ARM::R4, RegState::Kill)
1796 .addImm(16)
1797 .addReg(NextReg)
1800 NextReg += 4;
1801 NumAlignedDPRCS2Regs -= 4;
1802 }
1803
1804 // We won't modify r4 beyond this point. It currently points to the next
1805 // register to be spilled.
1806 unsigned R4BaseReg = NextReg;
1807
1808 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1809 if (NumAlignedDPRCS2Regs >= 4) {
1810 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1811 &ARM::QQPRRegClass);
1812 MBB.addLiveIn(SupReg);
1813 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1814 .addReg(ARM::R4)
1815 .addImm(16)
1816 .addReg(NextReg)
1819 NextReg += 4;
1820 NumAlignedDPRCS2Regs -= 4;
1821 }
1822
1823 // 16-byte aligned vst1.64 with 2 d-regs.
1824 if (NumAlignedDPRCS2Regs >= 2) {
1825 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1826 &ARM::QPRRegClass);
1827 MBB.addLiveIn(SupReg);
1828 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1829 .addReg(ARM::R4)
1830 .addImm(16)
1831 .addReg(SupReg)
1833 NextReg += 2;
1834 NumAlignedDPRCS2Regs -= 2;
1835 }
1836
1837 // Finally, use a vanilla vstr.64 for the odd last register.
1838 if (NumAlignedDPRCS2Regs) {
1839 MBB.addLiveIn(NextReg);
1840 // vstr.64 uses addrmode5 which has an offset scale of 4.
1841 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1842 .addReg(NextReg)
1843 .addReg(ARM::R4)
1844 .addImm((NextReg - R4BaseReg) * 2)
1846 }
1847
1848 // The last spill instruction inserted should kill the scratch register r4.
1849 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1850}
1851
1852/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1853/// iterator to the following instruction.
1856 unsigned NumAlignedDPRCS2Regs) {
1857 // sub r4, sp, #numregs * 8
1858 // bic r4, r4, #align - 1
1859 // mov sp, r4
1860 ++MI; ++MI; ++MI;
1861 assert(MI->mayStore() && "Expecting spill instruction");
1862
1863 // These switches all fall through.
1864 switch(NumAlignedDPRCS2Regs) {
1865 case 7:
1866 ++MI;
1867 assert(MI->mayStore() && "Expecting spill instruction");
1868 [[fallthrough]];
1869 default:
1870 ++MI;
1871 assert(MI->mayStore() && "Expecting spill instruction");
1872 [[fallthrough]];
1873 case 1:
1874 case 2:
1875 case 4:
1876 assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1877 ++MI;
1878 }
1879 return MI;
1880}
1881
1882/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1883/// starting from d8. These instructions are assumed to execute while the
1884/// stack is still aligned, unlike the code inserted by emitPopInst.
1887 unsigned NumAlignedDPRCS2Regs,
1889 const TargetRegisterInfo *TRI) {
1890 MachineFunction &MF = *MBB.getParent();
1892 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1894
1895 // Find the frame index assigned to d8.
1896 int D8SpillFI = 0;
1897 for (const CalleeSavedInfo &I : CSI)
1898 if (I.getReg() == ARM::D8) {
1899 D8SpillFI = I.getFrameIdx();
1900 break;
1901 }
1902
1903 // Materialize the address of the d8 spill slot into the scratch register r4.
1904 // This can be fairly complicated if the stack frame is large, so just use
1905 // the normal frame index elimination mechanism to do it. This code runs as
1906 // the initial part of the epilog where the stack and base pointers haven't
1907 // been changed yet.
1908 bool isThumb = AFI->isThumbFunction();
1909 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1910
1911 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1912 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1913 .addFrameIndex(D8SpillFI)
1914 .addImm(0)
1916 .add(condCodeOp());
1917
1918 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1919 unsigned NextReg = ARM::D8;
1920
1921 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1922 if (NumAlignedDPRCS2Regs >= 6) {
1923 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1924 &ARM::QQPRRegClass);
1925 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1926 .addReg(ARM::R4, RegState::Define)
1927 .addReg(ARM::R4, RegState::Kill)
1928 .addImm(16)
1931 NextReg += 4;
1932 NumAlignedDPRCS2Regs -= 4;
1933 }
1934
1935 // We won't modify r4 beyond this point. It currently points to the next
1936 // register to be spilled.
1937 unsigned R4BaseReg = NextReg;
1938
1939 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1940 if (NumAlignedDPRCS2Regs >= 4) {
1941 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1942 &ARM::QQPRRegClass);
1943 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1944 .addReg(ARM::R4)
1945 .addImm(16)
1948 NextReg += 4;
1949 NumAlignedDPRCS2Regs -= 4;
1950 }
1951
1952 // 16-byte aligned vld1.64 with 2 d-regs.
1953 if (NumAlignedDPRCS2Regs >= 2) {
1954 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1955 &ARM::QPRRegClass);
1956 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1957 .addReg(ARM::R4)
1958 .addImm(16)
1960 NextReg += 2;
1961 NumAlignedDPRCS2Regs -= 2;
1962 }
1963
1964 // Finally, use a vanilla vldr.64 for the remaining odd register.
1965 if (NumAlignedDPRCS2Regs)
1966 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1967 .addReg(ARM::R4)
1968 .addImm(2 * (NextReg - R4BaseReg))
1970
1971 // Last store kills r4.
1972 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1973}
1974
1978 if (CSI.empty())
1979 return false;
1980
1981 MachineFunction &MF = *MBB.getParent();
1983
1984 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1985 unsigned PushOneOpc = AFI->isThumbFunction() ?
1986 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1987 unsigned FltOpc = ARM::VSTMDDB_UPD;
1988 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1989 // Compute PAC in R12.
1990 if (AFI->shouldSignReturnAddress()) {
1991 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
1993 }
1994 // Save the non-secure floating point context.
1995 if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
1996 return C.getReg() == ARM::FPCXTNS;
1997 })) {
1998 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
1999 ARM::SP)
2000 .addReg(ARM::SP)
2001 .addImm(-4)
2003 }
2004 if (STI.splitFramePointerPush(MF)) {
2005 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2007 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2008 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2009 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2011 } else {
2012 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
2014 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
2016 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2017 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2018 }
2019
2020 // The code above does not insert spill code for the aligned DPRCS2 registers.
2021 // The stack realignment code will be inserted between the push instructions
2022 // and these spills.
2023 if (NumAlignedDPRCS2Regs)
2024 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2025
2026 return true;
2027}
2028
2032 if (CSI.empty())
2033 return false;
2034
2035 MachineFunction &MF = *MBB.getParent();
2037 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2038 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2039
2040 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2041 // registers. Do that here instead.
2042 if (NumAlignedDPRCS2Regs)
2043 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2044
2045 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2046 unsigned LdrOpc =
2047 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2048 unsigned FltOpc = ARM::VLDMDIA_UPD;
2049 if (STI.splitFramePointerPush(MF)) {
2050 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2052 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2053 NumAlignedDPRCS2Regs);
2054 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2056 } else {
2057 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2058 NumAlignedDPRCS2Regs);
2059 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2060 &isARMArea2Register, 0);
2061 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2062 &isARMArea1Register, 0);
2063 }
2064
2065 return true;
2066}
2067
2068// FIXME: Make generic?
2070 const ARMBaseInstrInfo &TII) {
2071 unsigned FnSize = 0;
2072 for (auto &MBB : MF) {
2073 for (auto &MI : MBB)
2074 FnSize += TII.getInstSizeInBytes(MI);
2075 }
2076 if (MF.getJumpTableInfo())
2077 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2078 FnSize += Table.MBBs.size() * 4;
2079 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2080 return FnSize;
2081}
2082
2083/// estimateRSStackSizeLimit - Look at each instruction that references stack
2084/// frames and return the stack size limit beyond which some of these
2085/// instructions will require a scratch register during their expansion later.
2086// FIXME: Move to TII?
2088 const TargetFrameLowering *TFI,
2089 bool &HasNonSPFrameIndex) {
2090 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2091 const ARMBaseInstrInfo &TII =
2092 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2094 unsigned Limit = (1 << 12) - 1;
2095 for (auto &MBB : MF) {
2096 for (auto &MI : MBB) {
2097 if (MI.isDebugInstr())
2098 continue;
2099 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2100 if (!MI.getOperand(i).isFI())
2101 continue;
2102
2103 // When using ADDri to get the address of a stack object, 255 is the
2104 // largest offset guaranteed to fit in the immediate offset.
2105 if (MI.getOpcode() == ARM::ADDri) {
2106 Limit = std::min(Limit, (1U << 8) - 1);
2107 break;
2108 }
2109 // t2ADDri will not require an extra register, it can reuse the
2110 // destination.
2111 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2112 break;
2113
2114 const MCInstrDesc &MCID = MI.getDesc();
2115 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
2116 if (RegClass && !RegClass->contains(ARM::SP))
2117 HasNonSPFrameIndex = true;
2118
2119 // Otherwise check the addressing mode.
2120 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2122 case ARMII::AddrMode2:
2123 // Default 12 bit limit.
2124 break;
2125 case ARMII::AddrMode3:
2127 Limit = std::min(Limit, (1U << 8) - 1);
2128 break;
2130 Limit = std::min(Limit, ((1U << 8) - 1) * 2);
2131 break;
2132 case ARMII::AddrMode5:
2135 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
2136 break;
2138 // i12 supports only positive offset so these will be converted to
2139 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2140 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2141 Limit = std::min(Limit, (1U << 8) - 1);
2142 break;
2143 case ARMII::AddrMode4:
2144 case ARMII::AddrMode6:
2145 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2146 // immediate offset for stack references.
2147 return 0;
2149 Limit = std::min(Limit, ((1U << 7) - 1) * 1);
2150 break;
2152 Limit = std::min(Limit, ((1U << 7) - 1) * 2);
2153 break;
2155 Limit = std::min(Limit, ((1U << 7) - 1) * 4);
2156 break;
2157 default:
2158 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2159 }
2160 break; // At most one FI per instruction
2161 }
2162 }
2163 }
2164
2165 return Limit;
2166}
2167
2168// In functions that realign the stack, it can be an advantage to spill the
2169// callee-saved vector registers after realigning the stack. The vst1 and vld1
2170// instructions take alignment hints that can improve performance.
2171static void
2173 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2175 return;
2176
2177 // Naked functions don't spill callee-saved registers.
2178 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2179 return;
2180
2181 // We are planning to use NEON instructions vst1 / vld1.
2182 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2183 return;
2184
2185 // Don't bother if the default stack alignment is sufficiently high.
2187 return;
2188
2189 // Aligned spills require stack realignment.
2190 if (!static_cast<const ARMBaseRegisterInfo *>(
2192 return;
2193
2194 // We always spill contiguous d-registers starting from d8. Count how many
2195 // needs spilling. The register allocator will almost always use the
2196 // callee-saved registers in order, but it can happen that there are holes in
2197 // the range. Registers above the hole will be spilled to the standard DPRCS
2198 // area.
2199 unsigned NumSpills = 0;
2200 for (; NumSpills < 8; ++NumSpills)
2201 if (!SavedRegs.test(ARM::D8 + NumSpills))
2202 break;
2203
2204 // Don't do this for just one d-register. It's not worth it.
2205 if (NumSpills < 2)
2206 return;
2207
2208 // Spill the first NumSpills D-registers after realigning the stack.
2209 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2210
2211 // A scratch register is required for the vst1 / vld1 instructions.
2212 SavedRegs.set(ARM::R4);
2213}
2214
2216 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2217 // upon function entry (resp. restore it immmediately before return)
2218 if (STI.hasV8_1MMainlineOps() &&
2220 return false;
2221
2222 // We are disabling shrinkwrapping for now when PAC is enabled, as
2223 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2224 // generated. A follow-up patch will fix this in a more performant manner.
2226 true /* SpillsLR */))
2227 return false;
2228
2229 return true;
2230}
2231
2233 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2234 return Subtarget.createAAPCSFrameChainLeaf() ||
2235 (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls());
2236}
2237
2238// Thumb1 may require a spill when storing to a frame index through FP (or any
2239// access with execute-only), for cases where FP is a high register (R11). This
2240// scans the function for cases where this may happen.
2242 const TargetFrameLowering &TFI) {
2243 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2244 if (!AFI->isThumb1OnlyFunction())
2245 return false;
2246
2247 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2248 for (const auto &MBB : MF)
2249 for (const auto &MI : MBB)
2250 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2251 STI.genExecuteOnly())
2252 for (const auto &Op : MI.operands())
2253 if (Op.isFI()) {
2254 Register Reg;
2255 TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
2256 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2257 return true;
2258 }
2259 return false;
2260}
2261
2263 BitVector &SavedRegs,
2264 RegScavenger *RS) const {
2266 // This tells PEI to spill the FP as if it is any other callee-save register
2267 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2268 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2269 // to combine multiple loads / stores.
2270 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
2271 bool CS1Spilled = false;
2272 bool LRSpilled = false;
2273 unsigned NumGPRSpills = 0;
2274 unsigned NumFPRSpills = 0;
2275 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2276 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2277 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2279 const ARMBaseInstrInfo &TII =
2280 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2282 MachineFrameInfo &MFI = MF.getFrameInfo();
2285 (void)TRI; // Silence unused warning in non-assert builds.
2286 Register FramePtr = RegInfo->getFrameRegister(MF);
2287
2288 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2289 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2290 // since it's not always possible to restore sp from fp in a single
2291 // instruction.
2292 // FIXME: It will be better just to find spare register here.
2293 if (AFI->isThumb2Function() &&
2294 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2295 SavedRegs.set(ARM::R4);
2296
2297 // If a stack probe will be emitted, spill R4 and LR, since they are
2298 // clobbered by the stack probe call.
2299 // This estimate should be a safe, conservative estimate. The actual
2300 // stack probe is enabled based on the size of the local objects;
2301 // this estimate also includes the varargs store size.
2302 if (STI.isTargetWindows() &&
2303 WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2304 SavedRegs.set(ARM::R4);
2305 SavedRegs.set(ARM::LR);
2306 }
2307
2308 if (AFI->isThumb1OnlyFunction()) {
2309 // Spill LR if Thumb1 function uses variable length argument lists.
2310 if (AFI->getArgRegsSaveSize() > 0)
2311 SavedRegs.set(ARM::LR);
2312
2313 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2314 // requires stack alignment. We don't know for sure what the stack size
2315 // will be, but for this, an estimate is good enough. If there anything
2316 // changes it, it'll be a spill, which implies we've used all the registers
2317 // and so R4 is already used, so not marking it here will be OK.
2318 // FIXME: It will be better just to find spare register here.
2319 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2320 MFI.estimateStackSize(MF) > 508)
2321 SavedRegs.set(ARM::R4);
2322 }
2323
2324 // See if we can spill vector registers to aligned stack.
2325 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2326
2327 // Spill the BasePtr if it's used.
2328 if (RegInfo->hasBasePointer(MF))
2329 SavedRegs.set(RegInfo->getBaseRegister());
2330
2331 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2332 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2333 CanEliminateFrame = false;
2334
2335 // When return address signing is enabled R12 is treated as callee-saved.
2336 if (AFI->shouldSignReturnAddress())
2337 CanEliminateFrame = false;
2338
2339 // Don't spill FP if the frame can be eliminated. This is determined
2340 // by scanning the callee-save registers to see if any is modified.
2341 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2342 for (unsigned i = 0; CSRegs[i]; ++i) {
2343 unsigned Reg = CSRegs[i];
2344 bool Spilled = false;
2345 if (SavedRegs.test(Reg)) {
2346 Spilled = true;
2347 CanEliminateFrame = false;
2348 }
2349
2350 if (!ARM::GPRRegClass.contains(Reg)) {
2351 if (Spilled) {
2352 if (ARM::SPRRegClass.contains(Reg))
2353 NumFPRSpills++;
2354 else if (ARM::DPRRegClass.contains(Reg))
2355 NumFPRSpills += 2;
2356 else if (ARM::QPRRegClass.contains(Reg))
2357 NumFPRSpills += 4;
2358 }
2359 continue;
2360 }
2361
2362 if (Spilled) {
2363 NumGPRSpills++;
2364
2365 if (!STI.splitFramePushPop(MF)) {
2366 if (Reg == ARM::LR)
2367 LRSpilled = true;
2368 CS1Spilled = true;
2369 continue;
2370 }
2371
2372 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2373 switch (Reg) {
2374 case ARM::LR:
2375 LRSpilled = true;
2376 [[fallthrough]];
2377 case ARM::R0: case ARM::R1:
2378 case ARM::R2: case ARM::R3:
2379 case ARM::R4: case ARM::R5:
2380 case ARM::R6: case ARM::R7:
2381 CS1Spilled = true;
2382 break;
2383 default:
2384 break;
2385 }
2386 } else {
2387 if (!STI.splitFramePushPop(MF)) {
2388 UnspilledCS1GPRs.push_back(Reg);
2389 continue;
2390 }
2391
2392 switch (Reg) {
2393 case ARM::R0: case ARM::R1:
2394 case ARM::R2: case ARM::R3:
2395 case ARM::R4: case ARM::R5:
2396 case ARM::R6: case ARM::R7:
2397 case ARM::LR:
2398 UnspilledCS1GPRs.push_back(Reg);
2399 break;
2400 default:
2401 UnspilledCS2GPRs.push_back(Reg);
2402 break;
2403 }
2404 }
2405 }
2406
2407 bool ForceLRSpill = false;
2408 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2409 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2410 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2411 // use of BL to implement far jump.
2412 if (FnSize >= (1 << 11)) {
2413 CanEliminateFrame = false;
2414 ForceLRSpill = true;
2415 }
2416 }
2417
2418 // If any of the stack slot references may be out of range of an immediate
2419 // offset, make sure a register (or a spill slot) is available for the
2420 // register scavenger. Note that if we're indexing off the frame pointer, the
2421 // effective stack size is 4 bytes larger since the FP points to the stack
2422 // slot of the previous FP. Also, if we have variable sized objects in the
2423 // function, stack slot references will often be negative, and some of
2424 // our instructions are positive-offset only, so conservatively consider
2425 // that case to want a spill slot (or register) as well. Similarly, if
2426 // the function adjusts the stack pointer during execution and the
2427 // adjustments aren't already part of our stack size estimate, our offset
2428 // calculations may be off, so be conservative.
2429 // FIXME: We could add logic to be more precise about negative offsets
2430 // and which instructions will need a scratch register for them. Is it
2431 // worth the effort and added fragility?
2432 unsigned EstimatedStackSize =
2433 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2434
2435 // Determine biggest (positive) SP offset in MachineFrameInfo.
2436 int MaxFixedOffset = 0;
2437 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2438 int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2439 MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2440 }
2441
2442 bool HasFP = hasFP(MF);
2443 if (HasFP) {
2444 if (AFI->hasStackFrame())
2445 EstimatedStackSize += 4;
2446 } else {
2447 // If FP is not used, SP will be used to access arguments, so count the
2448 // size of arguments into the estimation.
2449 EstimatedStackSize += MaxFixedOffset;
2450 }
2451 EstimatedStackSize += 16; // For possible paddings.
2452
2453 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2454 bool HasNonSPFrameIndex = false;
2455 if (AFI->isThumb1OnlyFunction()) {
2456 // For Thumb1, don't bother to iterate over the function. The only
2457 // instruction that requires an emergency spill slot is a store to a
2458 // frame index.
2459 //
2460 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2461 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2462 // a 5-bit unsigned immediate.
2463 //
2464 // We could try to check if the function actually contains a tSTRspi
2465 // that might need the spill slot, but it's not really important.
2466 // Functions with VLAs or extremely large call frames are rare, and
2467 // if a function is allocating more than 1KB of stack, an extra 4-byte
2468 // slot probably isn't relevant.
2469 //
2470 // A special case is the scenario where r11 is used as FP, where accesses
2471 // to a frame index will require its value to be moved into a low reg.
2472 // This is handled later on, once we are able to determine if we have any
2473 // fp-relative accesses.
2474 if (RegInfo->hasBasePointer(MF))
2475 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2476 else
2477 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2478 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2479 } else {
2480 EstimatedRSStackSizeLimit =
2481 estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2482 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2483 }
2484 // Final estimate of whether sp or bp-relative accesses might require
2485 // scavenging.
2486 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2487
2488 // If the stack pointer moves and we don't have a base pointer, the
2489 // estimate logic doesn't work. The actual offsets might be larger when
2490 // we're constructing a call frame, or we might need to use negative
2491 // offsets from fp.
2492 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2493 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2494 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2495
2496 // If we have a frame pointer, we assume arguments will be accessed
2497 // relative to the frame pointer. Check whether fp-relative accesses to
2498 // arguments require scavenging.
2499 //
2500 // We could do slightly better on Thumb1; in some cases, an sp-relative
2501 // offset would be legal even though an fp-relative offset is not.
2502 int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
2503 bool HasLargeArgumentList =
2504 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2505
2506 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2507 HasLargeArgumentList || HasNonSPFrameIndex;
2508 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2509 << "; EstimatedStack: " << EstimatedStackSize
2510 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2511 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2512 if (BigFrameOffsets ||
2513 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2514 AFI->setHasStackFrame(true);
2515
2516 if (HasFP) {
2517 SavedRegs.set(FramePtr);
2518 // If the frame pointer is required by the ABI, also spill LR so that we
2519 // emit a complete frame record.
2520 if ((requiresAAPCSFrameRecord(MF) ||
2522 !LRSpilled) {
2523 SavedRegs.set(ARM::LR);
2524 LRSpilled = true;
2525 NumGPRSpills++;
2526 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2527 if (LRPos != UnspilledCS1GPRs.end())
2528 UnspilledCS1GPRs.erase(LRPos);
2529 }
2530 auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2531 if (FPPos != UnspilledCS1GPRs.end())
2532 UnspilledCS1GPRs.erase(FPPos);
2533 NumGPRSpills++;
2534 if (FramePtr == ARM::R7)
2535 CS1Spilled = true;
2536 }
2537
2538 // This is the number of extra spills inserted for callee-save GPRs which
2539 // would not otherwise be used by the function. When greater than zero it
2540 // guaranteees that it is possible to scavenge a register to hold the
2541 // address of a stack slot. On Thumb1, the register must be a valid operand
2542 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2543 // or lr.
2544 //
2545 // If we don't insert a spill, we instead allocate an emergency spill
2546 // slot, which can be used by scavenging to spill an arbitrary register.
2547 //
2548 // We currently don't try to figure out whether any specific instruction
2549 // requires scavening an additional register.
2550 unsigned NumExtraCSSpill = 0;
2551
2552 if (AFI->isThumb1OnlyFunction()) {
2553 // For Thumb1-only targets, we need some low registers when we save and
2554 // restore the high registers (which aren't allocatable, but could be
2555 // used by inline assembly) because the push/pop instructions can not
2556 // access high registers. If necessary, we might need to push more low
2557 // registers to ensure that there is at least one free that can be used
2558 // for the saving & restoring, and preferably we should ensure that as
2559 // many as are needed are available so that fewer push/pop instructions
2560 // are required.
2561
2562 // Low registers which are not currently pushed, but could be (r4-r7).
2563 SmallVector<unsigned, 4> AvailableRegs;
2564
2565 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2566 // free.
2567 int EntryRegDeficit = 0;
2568 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2569 if (!MF.getRegInfo().isLiveIn(Reg)) {
2570 --EntryRegDeficit;
2572 << printReg(Reg, TRI)
2573 << " is unused argument register, EntryRegDeficit = "
2574 << EntryRegDeficit << "\n");
2575 }
2576 }
2577
2578 // Unused return registers can be clobbered in the epilogue for free.
2579 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2581 << " return regs used, ExitRegDeficit = "
2582 << ExitRegDeficit << "\n");
2583
2584 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2585 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2586
2587 // r4-r6 can be used in the prologue if they are pushed by the first push
2588 // instruction.
2589 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2590 if (SavedRegs.test(Reg)) {
2591 --RegDeficit;
2592 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2593 << " is saved low register, RegDeficit = "
2594 << RegDeficit << "\n");
2595 } else {
2596 AvailableRegs.push_back(Reg);
2597 LLVM_DEBUG(
2598 dbgs()
2599 << printReg(Reg, TRI)
2600 << " is non-saved low register, adding to AvailableRegs\n");
2601 }
2602 }
2603
2604 // r7 can be used if it is not being used as the frame pointer.
2605 if (!HasFP || FramePtr != ARM::R7) {
2606 if (SavedRegs.test(ARM::R7)) {
2607 --RegDeficit;
2608 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2609 << RegDeficit << "\n");
2610 } else {
2611 AvailableRegs.push_back(ARM::R7);
2612 LLVM_DEBUG(
2613 dbgs()
2614 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2615 }
2616 }
2617
2618 // Each of r8-r11 needs to be copied to a low register, then pushed.
2619 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2620 if (SavedRegs.test(Reg)) {
2621 ++RegDeficit;
2622 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2623 << " is saved high register, RegDeficit = "
2624 << RegDeficit << "\n");
2625 }
2626 }
2627
2628 // LR can only be used by PUSH, not POP, and can't be used at all if the
2629 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2630 // are more limited at function entry than exit.
2631 if ((EntryRegDeficit > ExitRegDeficit) &&
2632 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2634 if (SavedRegs.test(ARM::LR)) {
2635 --RegDeficit;
2636 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2637 << RegDeficit << "\n");
2638 } else {
2639 AvailableRegs.push_back(ARM::LR);
2640 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2641 }
2642 }
2643
2644 // If there are more high registers that need pushing than low registers
2645 // available, push some more low registers so that we can use fewer push
2646 // instructions. This might not reduce RegDeficit all the way to zero,
2647 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2648 // need saving.
2649 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2650 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2651 unsigned Reg = AvailableRegs.pop_back_val();
2652 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2653 << " to make up reg deficit\n");
2654 SavedRegs.set(Reg);
2655 NumGPRSpills++;
2656 CS1Spilled = true;
2657 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2658 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2659 NumExtraCSSpill++;
2660 UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2661 if (Reg == ARM::LR)
2662 LRSpilled = true;
2663 }
2664 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2665 << "\n");
2666 }
2667
2668 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2669 // restore LR in that case.
2670 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2671
2672 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2673 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2674 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2675 SavedRegs.set(ARM::LR);
2676 NumGPRSpills++;
2678 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2679 if (LRPos != UnspilledCS1GPRs.end())
2680 UnspilledCS1GPRs.erase(LRPos);
2681
2682 ForceLRSpill = false;
2683 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2684 !AFI->isThumb1OnlyFunction())
2685 NumExtraCSSpill++;
2686 }
2687
2688 // If stack and double are 8-byte aligned and we are spilling an odd number
2689 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2690 // the integer and double callee save areas.
2691 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2692 const Align TargetAlign = getStackAlign();
2693 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2694 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2695 for (unsigned Reg : UnspilledCS1GPRs) {
2696 // Don't spill high register if the function is thumb. In the case of
2697 // Windows on ARM, accept R11 (frame pointer)
2698 if (!AFI->isThumbFunction() ||
2699 (STI.isTargetWindows() && Reg == ARM::R11) ||
2700 isARMLowRegister(Reg) ||
2701 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2702 SavedRegs.set(Reg);
2703 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2704 << " to make up alignment\n");
2705 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2706 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2707 NumExtraCSSpill++;
2708 break;
2709 }
2710 }
2711 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2712 unsigned Reg = UnspilledCS2GPRs.front();
2713 SavedRegs.set(Reg);
2714 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2715 << " to make up alignment\n");
2716 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2717 NumExtraCSSpill++;
2718 }
2719 }
2720
2721 // Estimate if we might need to scavenge registers at some point in order
2722 // to materialize a stack offset. If so, either spill one additional
2723 // callee-saved register or reserve a special spill slot to facilitate
2724 // register scavenging. Thumb1 needs a spill slot for stack pointer
2725 // adjustments and for frame index accesses when FP is high register,
2726 // even when the frame itself is small.
2727 unsigned RegsNeeded = 0;
2728 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this)) {
2729 RegsNeeded++;
2730 // With thumb1 execute-only we may need an additional register for saving
2731 // and restoring the CPSR.
2732 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
2733 RegsNeeded++;
2734 }
2735
2736 if (RegsNeeded > NumExtraCSSpill) {
2737 // If any non-reserved CS register isn't spilled, just spill one or two
2738 // extra. That should take care of it!
2739 unsigned NumExtras = TargetAlign.value() / 4;
2741 while (NumExtras && !UnspilledCS1GPRs.empty()) {
2742 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
2743 if (!MRI.isReserved(Reg) &&
2744 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2745 Extras.push_back(Reg);
2746 NumExtras--;
2747 }
2748 }
2749 // For non-Thumb1 functions, also check for hi-reg CS registers
2750 if (!AFI->isThumb1OnlyFunction()) {
2751 while (NumExtras && !UnspilledCS2GPRs.empty()) {
2752 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
2753 if (!MRI.isReserved(Reg)) {
2754 Extras.push_back(Reg);
2755 NumExtras--;
2756 }
2757 }
2758 }
2759 if (NumExtras == 0) {
2760 for (unsigned Reg : Extras) {
2761 SavedRegs.set(Reg);
2762 if (!MRI.isPhysRegUsed(Reg))
2763 NumExtraCSSpill++;
2764 }
2765 }
2766 while ((RegsNeeded > NumExtraCSSpill) && RS) {
2767 // Reserve a slot closest to SP or frame pointer.
2768 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2769 const TargetRegisterClass &RC = ARM::GPRRegClass;
2770 unsigned Size = TRI->getSpillSize(RC);
2771 Align Alignment = TRI->getSpillAlign(RC);
2773 MFI.CreateStackObject(Size, Alignment, false));
2774 --RegsNeeded;
2775 }
2776 }
2777 }
2778
2779 if (ForceLRSpill)
2780 SavedRegs.set(ARM::LR);
2781 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2782}
2783
2785 MachineFrameInfo &MFI = MF.getFrameInfo();
2786 if (!MFI.isCalleeSavedInfoValid())
2787 return;
2788
2789 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
2790 // into PC so it is not live out of the return block: Clear the Restored bit
2791 // in that case.
2792 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
2793 if (Info.getReg() != ARM::LR)
2794 continue;
2795 if (all_of(MF, [](const MachineBasicBlock &MBB) {
2796 return all_of(MBB.terminators(), [](const MachineInstr &Term) {
2797 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
2798 Term.getOpcode() == ARM::t2LDMIA_RET ||
2799 Term.getOpcode() == ARM::tPOP_RET;
2800 });
2801 })) {
2802 Info.setRestored(false);
2803 break;
2804 }
2805 }
2806}
2807
2809 MachineFunction &MF, RegScavenger *RS) const {
2811 updateLRRestored(MF);
2812}
2813
2815 BitVector &SavedRegs) const {
2817
2818 // If we have the "returned" parameter attribute which guarantees that we
2819 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2820 // record that fact for IPRA.
2821 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2822 if (AFI->getPreservesR0())
2823 SavedRegs.set(ARM::R0);
2824}
2825
2828 std::vector<CalleeSavedInfo> &CSI) const {
2829 // For CMSE entry functions, handle floating-point context as if it was a
2830 // callee-saved register.
2831 if (STI.hasV8_1MMainlineOps() &&
2833 CSI.emplace_back(ARM::FPCXTNS);
2834 CSI.back().setRestored(false);
2835 }
2836
2837 // For functions, which sign their return address, upon function entry, the
2838 // return address PAC is computed in R12. Treat R12 as a callee-saved register
2839 // in this case.
2840 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
2841 if (AFI.shouldSignReturnAddress()) {
2842 // The order of register must match the order we push them, because the
2843 // PEI assigns frame indices in that order. When compiling for return
2844 // address sign and authenication, we use split push, therefore the orders
2845 // we want are:
2846 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2847 CSI.insert(find_if(CSI,
2848 [=](const auto &CS) {
2849 Register Reg = CS.getReg();
2850 return Reg == ARM::R10 || Reg == ARM::R11 ||
2851 Reg == ARM::R8 || Reg == ARM::R9 ||
2852 ARM::DPRRegClass.contains(Reg);
2853 }),
2854 CalleeSavedInfo(ARM::R12));
2855 }
2856
2857 return false;
2858}
2859
2862 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
2863 NumEntries = std::size(FixedSpillOffsets);
2864 return FixedSpillOffsets;
2865}
2866
2867MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2870 const ARMBaseInstrInfo &TII =
2871 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2873 bool isARM = !AFI->isThumbFunction();
2874 DebugLoc dl = I->getDebugLoc();
2875 unsigned Opc = I->getOpcode();
2876 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
2877 unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2878
2879 assert(!AFI->isThumb1OnlyFunction() &&
2880 "This eliminateCallFramePseudoInstr does not support Thumb1!");
2881
2882 int PIdx = I->findFirstPredOperandIdx();
2883 ARMCC::CondCodes Pred = (PIdx == -1)
2884 ? ARMCC::AL
2885 : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
2886 unsigned PredReg = TII.getFramePred(*I);
2887
2888 if (!hasReservedCallFrame(MF)) {
2889 // Bail early if the callee is expected to do the adjustment.
2890 if (IsDestroy && CalleePopAmount != -1U)
2891 return MBB.erase(I);
2892
2893 // If we have alloca, convert as follows:
2894 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2895 // ADJCALLSTACKUP -> add, sp, sp, amount
2896 unsigned Amount = TII.getFrameSize(*I);
2897 if (Amount != 0) {
2898 // We need to keep the stack aligned properly. To do this, we round the
2899 // amount of space needed for the outgoing arguments up to the next
2900 // alignment boundary.
2901 Amount = alignSPAdjust(Amount);
2902
2903 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2904 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2905 Pred, PredReg);
2906 } else {
2907 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2908 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2909 Pred, PredReg);
2910 }
2911 }
2912 } else if (CalleePopAmount != -1U) {
2913 // If the calling convention demands that the callee pops arguments from the
2914 // stack, we want to add it back if we have a reserved call frame.
2915 emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
2916 MachineInstr::NoFlags, Pred, PredReg);
2917 }
2918 return MBB.erase(I);
2919}
2920
2921/// Get the minimum constant for ARM that is greater than or equal to the
2922/// argument. In ARM, constants can have any value that can be produced by
2923/// rotating an 8-bit value to the right by an even number of bits within a
2924/// 32-bit word.
2926 unsigned Shifted = 0;
2927
2928 if (Value == 0)
2929 return 0;
2930
2931 while (!(Value & 0xC0000000)) {
2932 Value = Value << 2;
2933 Shifted += 2;
2934 }
2935
2936 bool Carry = (Value & 0x00FFFFFF);
2937 Value = ((Value & 0xFF000000) >> 24) + Carry;
2938
2939 if (Value & 0x0000100)
2940 Value = Value & 0x000001FC;
2941
2942 if (Shifted > 24)
2943 Value = Value >> (Shifted - 24);
2944 else
2945 Value = Value << (24 - Shifted);
2946
2947 return Value;
2948}
2949
2950// The stack limit in the TCB is set to this many bytes above the actual
2951// stack limit.
2953
2954// Adjust the function prologue to enable split stacks. This currently only
2955// supports android and linux.
2956//
2957// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2958// must be well defined in order to allow for consistent implementations of the
2959// __morestack helper function. The ABI is also not a normal ABI in that it
2960// doesn't follow the normal calling conventions because this allows the
2961// prologue of each function to be optimized further.
2962//
2963// Currently, the ABI looks like (when calling __morestack)
2964//
2965// * r4 holds the minimum stack size requested for this function call
2966// * r5 holds the stack size of the arguments to the function
2967// * the beginning of the function is 3 instructions after the call to
2968// __morestack
2969//
2970// Implementations of __morestack should use r4 to allocate a new stack, r5 to
2971// place the arguments on to the new stack, and the 3-instruction knowledge to
2972// jump directly to the body of the function when working on the new stack.
2973//
2974// An old (and possibly no longer compatible) implementation of __morestack for
2975// ARM can be found at [1].
2976//
2977// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2979 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2980 unsigned Opcode;
2981 unsigned CFIIndex;
2982 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2983 bool Thumb = ST->isThumb();
2984 bool Thumb2 = ST->isThumb2();
2985
2986 // Sadly, this currently doesn't support varargs, platforms other than
2987 // android/linux. Note that thumb1/thumb2 are support for android/linux.
2988 if (MF.getFunction().isVarArg())
2989 report_fatal_error("Segmented stacks do not support vararg functions.");
2990 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2991 report_fatal_error("Segmented stacks not supported on this platform.");
2992
2993 MachineFrameInfo &MFI = MF.getFrameInfo();
2994 MachineModuleInfo &MMI = MF.getMMI();
2995 MCContext &Context = MMI.getContext();
2996 const MCRegisterInfo *MRI = Context.getRegisterInfo();
2997 const ARMBaseInstrInfo &TII =
2998 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3000 DebugLoc DL;
3001
3002 if (!MFI.needsSplitStackProlog())
3003 return;
3004
3005 uint64_t StackSize = MFI.getStackSize();
3006
3007 // Use R4 and R5 as scratch registers.
3008 // We save R4 and R5 before use and restore them before leaving the function.
3009 unsigned ScratchReg0 = ARM::R4;
3010 unsigned ScratchReg1 = ARM::R5;
3011 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3012 uint64_t AlignedStackSize;
3013
3014 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3015 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3019
3020 // Grab everything that reaches PrologueMBB to update there liveness as well.
3021 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3023 WalkList.push_back(&PrologueMBB);
3024
3025 do {
3026 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3027 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3028 if (BeforePrologueRegion.insert(PredBB).second)
3029 WalkList.push_back(PredBB);
3030 }
3031 } while (!WalkList.empty());
3032
3033 // The order in that list is important.
3034 // The blocks will all be inserted before PrologueMBB using that order.
3035 // Therefore the block that should appear first in the CFG should appear
3036 // first in the list.
3037 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3038 PostStackMBB};
3039
3040 for (MachineBasicBlock *B : AddedBlocks)
3041 BeforePrologueRegion.insert(B);
3042
3043 for (const auto &LI : PrologueMBB.liveins()) {
3044 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3045 PredBB->addLiveIn(LI);
3046 }
3047
3048 // Remove the newly added blocks from the list, since we know
3049 // we do not have to do the following updates for them.
3050 for (MachineBasicBlock *B : AddedBlocks) {
3051 BeforePrologueRegion.erase(B);
3052 MF.insert(PrologueMBB.getIterator(), B);
3053 }
3054
3055 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3056 // Make sure the LiveIns are still sorted and unique.
3058 // Replace the edges to PrologueMBB by edges to the sequences
3059 // we are about to add, but only update for immediate predecessors.
3060 if (MBB->isSuccessor(&PrologueMBB))
3061 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
3062 }
3063
3064 // The required stack size that is aligned to ARM constant criterion.
3065 AlignedStackSize = alignToARMConstant(StackSize);
3066
3067 // When the frame size is less than 256 we just compare the stack
3068 // boundary directly to the value of the stack pointer, per gcc.
3069 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3070
3071 // We will use two of the callee save registers as scratch registers so we
3072 // need to save those registers onto the stack.
3073 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3074 // requested and arguments for __morestack().
3075 // SR0: Scratch Register #0
3076 // SR1: Scratch Register #1
3077 // push {SR0, SR1}
3078 if (Thumb) {
3079 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3081 .addReg(ScratchReg0)
3082 .addReg(ScratchReg1);
3083 } else {
3084 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3085 .addReg(ARM::SP, RegState::Define)
3086 .addReg(ARM::SP)
3088 .addReg(ScratchReg0)
3089 .addReg(ScratchReg1);
3090 }
3091
3092 // Emit the relevant DWARF information about the change in stack pointer as
3093 // well as where to find both r4 and r5 (the callee-save registers)
3094 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3095 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
3096 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3097 .addCFIIndex(CFIIndex);
3099 nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
3100 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3101 .addCFIIndex(CFIIndex);
3103 nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
3104 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3105 .addCFIIndex(CFIIndex);
3106 }
3107
3108 // mov SR1, sp
3109 if (Thumb) {
3110 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3111 .addReg(ARM::SP)
3113 } else if (CompareStackPointer) {
3114 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3115 .addReg(ARM::SP)
3117 .add(condCodeOp());
3118 }
3119
3120 // sub SR1, sp, #StackSize
3121 if (!CompareStackPointer && Thumb) {
3122 if (AlignedStackSize < 256) {
3123 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3124 .add(condCodeOp())
3125 .addReg(ScratchReg1)
3126 .addImm(AlignedStackSize)
3128 } else {
3129 if (Thumb2 || ST->genExecuteOnly()) {
3130 BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)
3131 .addImm(AlignedStackSize);
3132 } else {
3133 auto MBBI = McrMBB->end();
3134 auto RegInfo = STI.getRegisterInfo();
3135 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3136 AlignedStackSize);
3137 }
3138 BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3139 .add(condCodeOp())
3140 .addReg(ScratchReg1)
3141 .addReg(ScratchReg0)
3143 }
3144 } else if (!CompareStackPointer) {
3145 if (AlignedStackSize < 256) {
3146 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3147 .addReg(ARM::SP)
3148 .addImm(AlignedStackSize)
3150 .add(condCodeOp());
3151 } else {
3152 auto MBBI = McrMBB->end();
3153 auto RegInfo = STI.getRegisterInfo();
3154 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3155 AlignedStackSize);
3156 BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3157 .addReg(ARM::SP)
3158 .addReg(ScratchReg0)
3160 .add(condCodeOp());
3161 }
3162 }
3163
3164 if (Thumb && ST->isThumb1Only()) {
3165 if (ST->genExecuteOnly()) {
3166 BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)
3167 .addExternalSymbol("__STACK_LIMIT");
3168 } else {
3169 unsigned PCLabelId = ARMFI->createPICLabelUId();
3171 MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
3173 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
3174
3175 // ldr SR0, [pc, offset(STACK_LIMIT)]
3176 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3179 }
3180
3181 // ldr SR0, [SR0]
3182 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3183 .addReg(ScratchReg0)
3184 .addImm(0)
3186 } else {
3187 // Get TLS base address from the coprocessor
3188 // mrc p15, #0, SR0, c13, c0, #3
3189 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3190 ScratchReg0)
3191 .addImm(15)
3192 .addImm(0)
3193 .addImm(13)
3194 .addImm(0)
3195 .addImm(3)
3197
3198 // Use the last tls slot on android and a private field of the TCP on linux.
3199 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3200 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3201
3202 // Get the stack limit from the right offset
3203 // ldr SR0, [sr0, #4 * TlsOffset]
3204 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3205 ScratchReg0)
3206 .addReg(ScratchReg0)
3207 .addImm(4 * TlsOffset)
3209 }
3210
3211 // Compare stack limit with stack size requested.
3212 // cmp SR0, SR1
3213 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3214 BuildMI(GetMBB, DL, TII.get(Opcode))
3215 .addReg(ScratchReg0)
3216 .addReg(ScratchReg1)
3218
3219 // This jump is taken if StackLimit <= SP - stack required.
3220 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3221 BuildMI(GetMBB, DL, TII.get(Opcode))
3222 .addMBB(PostStackMBB)
3224 .addReg(ARM::CPSR);
3225
3226 // Calling __morestack(StackSize, Size of stack arguments).
3227 // __morestack knows that the stack size requested is in SR0(r4)
3228 // and amount size of stack arguments is in SR1(r5).
3229
3230 // Pass first argument for the __morestack by Scratch Register #0.
3231 // The amount size of stack required
3232 if (Thumb) {
3233 if (AlignedStackSize < 256) {
3234 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3235 .add(condCodeOp())
3236 .addImm(AlignedStackSize)
3238 } else {
3239 if (Thumb2 || ST->genExecuteOnly()) {
3240 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)
3241 .addImm(AlignedStackSize);
3242 } else {
3243 auto MBBI = AllocMBB->end();
3244 auto RegInfo = STI.getRegisterInfo();
3245 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3246 AlignedStackSize);
3247 }
3248 }
3249 } else {
3250 if (AlignedStackSize < 256) {
3251 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3252 .addImm(AlignedStackSize)
3254 .add(condCodeOp());
3255 } else {
3256 auto MBBI = AllocMBB->end();
3257 auto RegInfo = STI.getRegisterInfo();
3258 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3259 AlignedStackSize);
3260 }
3261 }
3262
3263 // Pass second argument for the __morestack by Scratch Register #1.
3264 // The amount size of stack consumed to save function arguments.
3265 if (Thumb) {
3266 if (ARMFI->getArgumentStackSize() < 256) {
3267 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3268 .add(condCodeOp())
3271 } else {
3272 if (Thumb2 || ST->genExecuteOnly()) {
3273 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)
3275 } else {
3276 auto MBBI = AllocMBB->end();
3277 auto RegInfo = STI.getRegisterInfo();
3278 RegInfo->emitLoadConstPool(
3279 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3281 }
3282 }
3283 } else {
3284 if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3285 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3288 .add(condCodeOp());
3289 } else {
3290 auto MBBI = AllocMBB->end();
3291 auto RegInfo = STI.getRegisterInfo();
3292 RegInfo->emitLoadConstPool(
3293 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3295 }
3296 }
3297
3298 // push {lr} - Save return address of this function.
3299 if (Thumb) {
3300 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3302 .addReg(ARM::LR);
3303 } else {
3304 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3305 .addReg(ARM::SP, RegState::Define)
3306 .addReg(ARM::SP)
3308 .addReg(ARM::LR);
3309 }
3310
3311 // Emit the DWARF info about the change in stack as well as where to find the
3312 // previous link register
3313 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3314 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
3315 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3316 .addCFIIndex(CFIIndex);
3318 nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
3319 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3320 .addCFIIndex(CFIIndex);
3321 }
3322
3323 // Call __morestack().
3324 if (Thumb) {
3325 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3327 .addExternalSymbol("__morestack");
3328 } else {
3329 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3330 .addExternalSymbol("__morestack");
3331 }
3332
3333 // pop {lr} - Restore return address of this original function.
3334 if (Thumb) {
3335 if (ST->isThumb1Only()) {
3336 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3338 .addReg(ScratchReg0);
3339 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3340 .addReg(ScratchReg0)
3342 } else {
3343 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3344 .addReg(ARM::LR, RegState::Define)
3345 .addReg(ARM::SP, RegState::Define)
3346 .addReg(ARM::SP)
3347 .addImm(4)
3349 }
3350 } else {
3351 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3352 .addReg(ARM::SP, RegState::Define)
3353 .addReg(ARM::SP)
3355 .addReg(ARM::LR);
3356 }
3357
3358 // Restore SR0 and SR1 in case of __morestack() was called.
3359 // __morestack() will skip PostStackMBB block so we need to restore
3360 // scratch registers from here.
3361 // pop {SR0, SR1}
3362 if (Thumb) {
3363 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3365 .addReg(ScratchReg0)
3366 .addReg(ScratchReg1);
3367 } else {
3368 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3369 .addReg(ARM::SP, RegState::Define)
3370 .addReg(ARM::SP)
3372 .addReg(ScratchReg0)
3373 .addReg(ScratchReg1);
3374 }
3375
3376 // Update the CFA offset now that we've popped
3377 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3378 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3379 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3380 .addCFIIndex(CFIIndex);
3381 }
3382
3383 // Return from this function.
3384 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3385
3386 // Restore SR0 and SR1 in case of __morestack() was not called.
3387 // pop {SR0, SR1}
3388 if (Thumb) {
3389 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3391 .addReg(ScratchReg0)
3392 .addReg(ScratchReg1);
3393 } else {
3394 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3395 .addReg(ARM::SP, RegState::Define)
3396 .addReg(ARM::SP)
3398 .addReg(ScratchReg0)
3399 .addReg(ScratchReg1);
3400 }
3401
3402 // Update the CFA offset now that we've popped
3403 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3404 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3405 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3406 .addCFIIndex(CFIIndex);
3407
3408 // Tell debuggers that r4 and r5 are now the same as they were in the
3409 // previous function, that they're the "Same Value".
3411 nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
3412 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3413 .addCFIIndex(CFIIndex);
3415 nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
3416 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3417 .addCFIIndex(CFIIndex);
3418 }
3419
3420 // Organizing MBB lists
3421 PostStackMBB->addSuccessor(&PrologueMBB);
3422
3423 AllocMBB->addSuccessor(PostStackMBB);
3424
3425 GetMBB->addSuccessor(PostStackMBB);
3426 GetMBB->addSuccessor(AllocMBB);
3427
3428 McrMBB->addSuccessor(GetMBB);
3429
3430 PrevStackMBB->addSuccessor(McrMBB);
3431
3432#ifdef EXPENSIVE_CHECKS
3433 MF.verify();
3434#endif
3435}
unsigned const MachineRegisterInfo * MRI
static int64_t getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)
Returns how much of the incoming argument stack area (in bytes) we should clean up in an epilogue.
static bool needsWinCFI(const MachineFunction &MF)
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static bool isThumb(const MCSubtargetInfo &STI)
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
static void insertSEHRange(MachineBasicBlock &MBB, MachineBasicBlock::iterator Start, const MachineBasicBlock::iterator &End, const ARMBaseInstrInfo &TII, unsigned MIFlags)
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, unsigned Flags)
static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, const TargetFrameLowering &TFI)
static bool requiresAAPCSFrameRecord(const MachineFunction &MF)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, const MachineFunction &MF)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
static MachineBasicBlock::iterator initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI)
static int sizeOfSPAdjustment(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
LLVMContext & Context
const char LLVMTargetMachineRef TM
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Value * RHS
Value * LHS
static const unsigned FramePtr
bool hasBasePointer(const MachineFunction &MF) const
virtual void emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred=ARMCC::AL, Register PredReg=Register(), unsigned MIFlags=MachineInstr::NoFlags) const
emitLoadConstPool - Emits a load from constpool to materialize the specified immediate.
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool cannotEliminateFrame(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
bool canRealignStack(const MachineFunction &MF) const override
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
ARMFrameLowering(const ARMSubtarget &sti)
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
bool keepFramePointer(const MachineFunction &MF) const override
Return true if the target wants to keep the frame pointer regardless of the function attribute "frame...
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool isFPReserved(const MachineFunction &MF) const
isFPReserved - Return true if the frame pointer register should be considered a reserved register on ...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override
getCalleeSavedSpillSlots - This method returns a pointer to an array of pairs, that contains an entry...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, int SPAdj) const
void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const override
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
const ARMSubtarget & STI
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
void setDPRCalleeSavedAreaSize(unsigned s)
unsigned getFPCXTSaveAreaSize() const
unsigned getGPRCalleeSavedArea1Size() const
unsigned getDPRCalleeSavedGapSize() const
void setGPRCalleeSavedArea2Size(unsigned s)
void setDPRCalleeSavedAreaOffset(unsigned o)
void setFramePtrSpillOffset(unsigned o)
unsigned getGPRCalleeSavedArea2Size() const
unsigned getNumAlignedDPRCS2Regs() const
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getArgumentStackToRestore() const
void setFPCXTSaveAreaSize(unsigned s)
unsigned getDPRCalleeSavedAreaSize() const
unsigned getFramePtrSpillOffset() const
unsigned getArgRegsSaveSize() const
void setGPRCalleeSavedArea2Offset(unsigned o)
void setGPRCalleeSavedArea1Offset(unsigned o)
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getArgumentStackSize() const
unsigned getReturnRegsCount() const
bool useMovt() const
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:266
bool isTargetWindows() const
Definition: ARMSubtarget.h:378
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:278
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11),...
Definition: ARMSubtarget.h:455
bool splitFramePointerPush(const MachineFunction &MF) const
bool isTargetELF() const
Definition: ARMSubtarget.h:381
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:274
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:342
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:213
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:669
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:799
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:548
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:583
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:556
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:541
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:630
Context object for machine code objects.
Definition: MCContext.h:76
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
iterator_range< livein_iterator > liveins() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to 'Old', change the code and CFG so that it branches to 'N...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
int getStackProtectorIndex() const
Return the index for the stack protector object.
int getOffsetAdjustment() const
Return the correction for frame offsets.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
This class contains meta information specific to a module.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
Information about stack frame layout on the target.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment.
virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const
getFrameIndexReference - This method should return the base register and offset used to reference a f...
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
Definition: Value.h:74
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isSplitFPArea1Register(unsigned Reg, bool SplitFramePushPop)
static bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
static bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop)
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1923
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
static bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
static bool isSplitFPArea2Register(unsigned Reg, bool SplitFramePushPop)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85